In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
boston = pd.read_csv('Boston_house.csv')
boston.head()

Unnamed: 0,AGE,B,RM,CRIM,DIS,INDUS,LSTAT,NOX,PTRATIO,RAD,ZN,TAX,CHAS,Target
0,65.2,396.9,6.575,0.00632,4.09,2.31,4.98,0.538,15.3,1,18.0,296,0,24.0
1,78.9,396.9,6.421,0.02731,4.9671,7.07,9.14,0.469,17.8,2,0.0,242,0,21.6
2,61.1,392.83,7.185,0.02729,4.9671,7.07,4.03,0.469,17.8,2,0.0,242,0,34.7
3,45.8,394.63,6.998,0.03237,6.0622,2.18,2.94,0.458,18.7,3,0.0,222,0,33.4
4,54.2,396.9,7.147,0.06905,6.0622,2.18,5.33,0.458,18.7,3,0.0,222,0,36.2


In [3]:
# extract all columns except Target column
boston_date = boston.drop(['Target'], axis=1)
boston_date.describe()

Unnamed: 0,AGE,B,RM,CRIM,DIS,INDUS,LSTAT,NOX,PTRATIO,RAD,ZN,TAX,CHAS
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,68.574901,356.674032,6.284634,3.613524,3.795043,11.136779,12.653063,0.554695,18.455534,9.549407,11.363636,408.237154,0.06917
std,28.148861,91.294864,0.702617,8.601545,2.10571,6.860353,7.141062,0.115878,2.164946,8.707259,23.322453,168.537116,0.253994
min,2.9,0.32,3.561,0.00632,1.1296,0.46,1.73,0.385,12.6,1.0,0.0,187.0,0.0
25%,45.025,375.3775,5.8855,0.082045,2.100175,5.19,6.95,0.449,17.4,4.0,0.0,279.0,0.0
50%,77.5,391.44,6.2085,0.25651,3.20745,9.69,11.36,0.538,19.05,5.0,0.0,330.0,0.0
75%,94.075,396.225,6.6235,3.677082,5.188425,18.1,16.955,0.624,20.2,24.0,12.5,666.0,0.0
max,100.0,396.9,8.78,88.9762,12.1265,27.74,37.97,0.871,22.0,24.0,100.0,711.0,1.0


### 1978 Boston housing price data

- 1. AGE : rate of housing built before 1940
- 2. B : rate of black in population
- 3. RM : number of rooms per house
- 4. DIS : distance b/w job
- 5. INDUS : property of non commercial area
- 6. LSTAT : rate of lower class population 
- 7. NOX : concentration of nitrogen monoxide
- 8. PTRATIO : student/teacher ratio
- 9. RAD : distance to highway 
- 10. ZN : rate of living area that exceeds 25,000 ft^2
- 11. TAX : income tax
- 12. CHAS : located near Chales River yeas=1,  no=0

### Multiple regression model
- using crim, rm, lstat

In [5]:
x_data = boston[['CRIM', 'RM', 'LSTAT']]
target = boston[['Target']]
x_data.head()

Unnamed: 0,CRIM,RM,LSTAT
0,0.00632,6.575,4.98
1,0.02731,6.421,9.14
2,0.02729,7.185,4.03
3,0.03237,6.998,2.94
4,0.06905,7.147,5.33


In [6]:
# add constant
x_data1 = sm.add_constant(x_data, has_constant='add')

  return ptp(axis=axis, out=out, **kwargs)


In [7]:
# fit into model
multi_model = sm.OLS(target, x_data1)
fitted_multi_model = multi_model.fit()

In [8]:
fitted_multi_model.summary()

0,1,2,3
Dep. Variable:,Target,R-squared:,0.646
Model:,OLS,Adj. R-squared:,0.644
Method:,Least Squares,F-statistic:,305.2
Date:,"Thu, 30 Jul 2020",Prob (F-statistic):,1.01e-112
Time:,14:58:37,Log-Likelihood:,-1577.6
No. Observations:,506,AIC:,3163.0
Df Residuals:,502,BIC:,3180.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.5623,3.166,-0.809,0.419,-8.783,3.658
CRIM,-0.1029,0.032,-3.215,0.001,-0.166,-0.040
RM,5.2170,0.442,11.802,0.000,4.348,6.085
LSTAT,-0.5785,0.048,-12.135,0.000,-0.672,-0.485

0,1,2,3
Omnibus:,171.754,Durbin-Watson:,0.822
Prob(Omnibus):,0.0,Jarque-Bera (JB):,628.308
Skew:,1.535,Prob(JB):,3.67e-137
Kurtosis:,7.514,Cond. No.,216.0


### multiple regression using crim, rm, lstat, b,tax,age, zn, nox, indus variables

In [10]:
## extract data from boston_data
x_data2 = boston[['CRIM', 'RM', 'LSTAT', 'B', 'TAX', 'AGE', 'ZN', 'NOX', 'INDUS']]
x_data2.head()

Unnamed: 0,CRIM,RM,LSTAT,B,TAX,AGE,ZN,NOX,INDUS
0,0.00632,6.575,4.98,396.9,296,65.2,18.0,0.538,2.31
1,0.02731,6.421,9.14,396.9,242,78.9,0.0,0.469,7.07
2,0.02729,7.185,4.03,392.83,242,61.1,0.0,0.469,7.07
3,0.03237,6.998,2.94,394.63,222,45.8,0.0,0.458,2.18
4,0.06905,7.147,5.33,396.9,222,54.2,0.0,0.458,2.18
