In [1]:
%load_ext lab_black

import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats

In [2]:
data_2021 = pd.read_csv("../data/airbnb_listings_2021.csv")
data_2021.columns

Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365', 'number_of_reviews_ltm', 'license'],
      dtype='object')

In [3]:
# Dropping irrelevant columns
data_2021 = data_2021[
    [
        "price",
        "neighbourhood",
    ]
]
data_2021.dropna()
data_2021 = pd.get_dummies(data_2021, columns=["neighbourhood"])
data_2021.drop(["neighbourhood_Midtown"], axis=1, inplace=True)
data_2021.astype(float)
data_2021

Unnamed: 0,price,neighbourhood_Allerton,neighbourhood_Arden Heights,neighbourhood_Arrochar,neighbourhood_Arverne,neighbourhood_Astoria,neighbourhood_Bath Beach,neighbourhood_Battery Park City,neighbourhood_Bay Ridge,neighbourhood_Bay Terrace,...,neighbourhood_Westchester Square,neighbourhood_Westerleigh,neighbourhood_Whitestone,neighbourhood_Williamsbridge,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodside
0,150,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,76,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,60,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,275,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,68,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36918,86,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36919,71,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36920,85,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36921,66,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
Y = data_2021["price"]
Y

0        150
1         76
2         60
3        275
4         68
        ... 
36918     86
36919     71
36920     85
36921     66
36922    114
Name: price, Length: 36923, dtype: int64

In [5]:
X = data_2021
data_2021.drop(["price"], axis=1, inplace=True)
Xc = sm.add_constant(X)
Xc

Unnamed: 0,const,neighbourhood_Allerton,neighbourhood_Arden Heights,neighbourhood_Arrochar,neighbourhood_Arverne,neighbourhood_Astoria,neighbourhood_Bath Beach,neighbourhood_Battery Park City,neighbourhood_Bay Ridge,neighbourhood_Bay Terrace,...,neighbourhood_Westchester Square,neighbourhood_Westerleigh,neighbourhood_Whitestone,neighbourhood_Williamsbridge,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodside
0,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36918,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36919,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36920,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36921,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
esta = sm.OLS(Y, Xc)
est1 = esta.fit()
est1.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.046
Model:,OLS,Adj. R-squared:,0.04
Method:,Least Squares,F-statistic:,7.97
Date:,"Mon, 09 May 2022",Prob (F-statistic):,4.65e-232
Time:,20:20:42,Log-Likelihood:,-262010.0
No. Observations:,36923,AIC:,524500.0
Df Residuals:,36700,BIC:,526400.0
Df Model:,222,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,301.5707,7.514,40.135,0.000,286.843,316.298
neighbourhood_Allerton,-192.1019,52.345,-3.670,0.000,-294.699,-89.505
neighbourhood_Arden Heights,-116.8207,146.713,-0.796,0.426,-404.382,170.740
neighbourhood_Arrochar,-180.4040,69.478,-2.597,0.009,-316.582,-44.226
neighbourhood_Arverne,-46.0530,32.661,-1.410,0.159,-110.069,17.963
neighbourhood_Astoria,-188.1121,13.900,-13.534,0.000,-215.356,-160.869
neighbourhood_Bath Beach,-197.1262,69.478,-2.837,0.005,-333.305,-60.948
neighbourhood_Battery Park City,-88.3759,34.230,-2.582,0.010,-155.467,-21.284
neighbourhood_Bay Ridge,-200.6914,28.227,-7.110,0.000,-256.016,-145.367

0,1,2,3
Omnibus:,78683.374,Durbin-Watson:,1.828
Prob(Omnibus):,0.0,Jarque-Bera (JB):,443179051.85
Skew:,18.804,Prob(JB):,0.0
Kurtosis:,538.399,Cond. No.,195.0
