#### Import libraries

In [1]:
import pandas as pd ### Library for handling dataframes
from scipy.stats import ttest_ind ### Library for performing unpaired t-test
import statsmodels.api as sm ### Library for running linear regressions

#### Read Data

In [2]:
data = pd.read_csv('data.csv')

In [3]:
data.head()

Unnamed: 0,price,dist,bathrooms,garages,area,SES,popdens,perc_com
0,40000,0.69,1,1,58,3,462.56,0.0
1,40000,0.69,1,1,58,3,462.56,0.0
2,57900,0.68,2,1,73,3,461.99,0.0
3,65000,0.92,2,0,120,3,436.21,0.0
4,140000,0.69,3,1,375,3,462.56,0.0


#### Perform mulitvariate linear regression

Estimate a multivariable regression model

Dependent variable: price

Main variable of interest: distance to BRT station

Other variables to control for: # bathrooms, # garages, area of apartment, SES, neighborhood density, % area of neighborhood in commercial

###### Regression without controls

In [6]:
y = data['price'] ### define dependent variable
x = data[['dist']].assign(Intercept = 1) ### define independent variables - and include intercept

sm.OLS(y, x).fit().summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.005
Dependent Variable:,price,AIC:,72648.8722
Date:,2020-02-04 12:41,BIC:,72660.8363
No. Observations:,2928,Log-Likelihood:,-36322.0
Df Model:,1,F-statistic:,14.83
Df Residuals:,2926,Prob (F-statistic):,0.00012
R-squared:,0.005,Scale:,3490200000.0

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
dist,2095.2766,544.0826,3.8510,0.0001,1028.4530,3162.1001
Intercept,74327.2220,1517.9166,48.9666,0.0000,71350.9290,77303.5150

0,1,2,3
Omnibus:,2158.618,Durbin-Watson:,0.695
Prob(Omnibus):,0.0,Jarque-Bera (JB):,49664.788
Skew:,3.275,Prob(JB):,0.0
Kurtosis:,22.084,Condition No.:,4.0


###### Regression with controls

In [5]:
y = data['price'] ### define dependent variable
x = data[['dist', 'bathrooms', 'garages', 'area', 'SES', 'popdens', 'perc_com']].assign(Intercept = 1) ### define independent variables - and include intercept

sm.OLS(y, x).fit().summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.549
Dependent Variable:,price,AIC:,70334.6885
Date:,2020-02-04 12:39,BIC:,70382.5451
No. Observations:,2928,Log-Likelihood:,-35159.0
Df Model:,7,F-statistic:,510.8
Df Residuals:,2920,Prob (F-statistic):,0.0
R-squared:,0.550,Scale:,1580200000.0

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
dist,933.7241,388.2226,2.4051,0.0162,172.5063,1694.9419
bathrooms,13647.0590,1021.7015,13.3572,0.0000,11643.7304,15650.3876
garages,13207.9686,1203.5132,10.9745,0.0000,10848.1480,15567.7892
area,283.6504,12.5247,22.6472,0.0000,259.0922,308.2086
SES,26767.8617,1181.5197,22.6555,0.0000,24451.1653,29084.5581
popdens,3.7981,3.8012,0.9992,0.3178,-3.6552,11.2513
perc_com,-2433.2817,3007.0532,-0.8092,0.4185,-8329.4417,3462.8782
Intercept,-82406.3586,4139.3676,-19.9080,0.0000,-90522.7343,-74289.9830

0,1,2,3
Omnibus:,3184.65,Durbin-Watson:,1.298
Prob(Omnibus):,0.0,Jarque-Bera (JB):,376829.526
Skew:,5.299,Prob(JB):,0.0
Kurtosis:,57.557,Condition No.:,2868.0
