In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
data = pd.read_csv("../data/clean_listings.csv")

In [3]:
dataForward = data.drop(["id"], axis=1)

# Linear Regression #

### Forward Selection ###

In [9]:
currentRegression = []
available = list(dataForward.columns)
# Dependent/responding variable
available.remove('review_scores_rating')

bestAllTimeAic = np.inf
bestForwardModel = None
flag = True

while flag:
    currentBestAic = np.inf
    print("Current best AIC = " + str(bestAllTimeAic))
    print("Current regression: " + ', '.join(currentRegression))
    toAdd = None
    for each in available:
        regTemp = currentRegression[:]
        regTemp.extend([each])
        currentLm = sm.OLS(dataForward.review_scores_rating, sm.add_constant(dataForward[regTemp])).fit()
        currentAic = currentLm.aic
        print("\tChecking " + each + ": AIC = " + str(currentAic))
        if currentAic < currentBestAic:
            toAdd = each
            bestForwardModel = currentLm
            currentBestAic = currentAic
    if currentBestAic < bestAllTimeAic:
        print("\t\tAdding " + toAdd + " to model. New best AIC = " + str(currentBestAic) + '\n\n')
        currentRegression.append(toAdd)
        available.remove(toAdd)
        bestAllTimeAic = currentBestAic
    else:
        print("\t\tAdding no new variables to model. Ending forward selection.")
        flag = False

Current best AIC = inf
Current regression: 
	Checking accommodates: AIC = 20766.586914455347
	Checking bathrooms: AIC = 20760.745490466707
	Checking bedrooms: AIC = 20765.788736942646
	Checking beds: AIC = 20767.327697899796
	Checking price: AIC = 20758.088601087842
	Checking number_of_reviews: AIC = 20763.184849815083
	Checking review_scores_cleanliness: AIC = 19109.323542124912
	Checking review_scores_checkin: AIC = 19779.658671528
	Checking review_scores_communication: AIC = 19675.546692072494
	Checking review_scores_location: AIC = 20313.2426341461
	Checking review_scores_value: AIC = 18691.04228755772
	Checking 24-HourCheck-in: AIC = 20759.614200929147
	Checking AirConditioning: AIC = 20754.06571518285
	Checking Breakfast: AIC = 20762.891511488044
	Checking Buzzer/WirelessIntercom: AIC = 20767.18399264816
	Checking CableTV: AIC = 20761.71920890884
	Checking CarbonMonoxideDetector: AIC = 20736.841641850406
	Checking Cat(s): AIC = 20753.897281132824
	Checking Dog(s): AIC = 20757.520

	Checking Cat(s): AIC = 17690.675649310255
	Checking Dog(s): AIC = 17689.853343826082
	Checking Doorman: AIC = 17683.875012984387
	Checking Dryer: AIC = 17681.448399045665
	Checking ElevatorinBuilding: AIC = 17678.472342655674
	Checking Essentials: AIC = 17690.3439734666
	Checking Family/KidFriendly: AIC = 17690.51538796245
	Checking FireExtinguisher: AIC = 17688.628321111788
	Checking FirstAidKit: AIC = 17685.579572943978
	Checking FreeParkingonPremises: AIC = 17690.444466050885
	Checking Gym: AIC = 17688.51964355797
	Checking HairDryer: AIC = 17690.585115526555
	Checking Hangers: AIC = 17690.719868319222
	Checking Heating: AIC = 17686.149047616356
	Checking HotTub: AIC = 17690.503643591514
	Checking IndoorFireplace: AIC = 17685.46689720712
	Checking Internet: AIC = 17688.226659394193
	Checking Iron: AIC = 17690.73103340493
	Checking Kitchen: AIC = 17679.558171859688
	Checking LaptopFriendlyWorkspace: AIC = 17689.461772645842
	Checking LockonBedroomDoor: AIC = 17689.420904148385
	Chec

	Checking Iron: AIC = 17568.01223659728
	Checking Kitchen: AIC = 17562.54891938819
	Checking LaptopFriendlyWorkspace: AIC = 17567.914122924354
	Checking LockonBedroomDoor: AIC = 17567.84744770492
	Checking Otherpet(s): AIC = 17567.724529977975
	Checking PetsAllowed: AIC = 17567.138683540445
	Checking Petsliveonthisproperty: AIC = 17564.305474718705
	Checking Pool: AIC = 17568.224276435492
	Checking SafetyCard: AIC = 17555.510833484997
	Checking Shampoo: AIC = 17565.023174636903
	Checking SmokeDetector: AIC = 17566.796577696274
	Checking SmokingAllowed: AIC = 17563.313479431738
	Checking SuitableforEvents: AIC = 17567.124287048784
	Checking TV: AIC = 17567.049622518178
	Checking Washer: AIC = 17562.771857118518
	Checking Washer/Dryer: AIC = 17568.222326388426
	Checking WheelchairAccessible: AIC = 17568.2467823425
	Checking WirelessInternet: AIC = 17561.540557571177
		Adding SafetyCard to model. New best AIC = 17555.510833484997


Current best AIC = 17555.510833484997
Current regression:

	Checking Dog(s): AIC = 17545.59249081886
	Checking Doorman: AIC = 17545.51909657843
	Checking Dryer: AIC = 17542.475970909865
	Checking ElevatorinBuilding: AIC = 17544.259916240444
	Checking Essentials: AIC = 17546.689295512548
	Checking Family/KidFriendly: AIC = 17546.710783243016
	Checking FireExtinguisher: AIC = 17546.089056089055
	Checking FirstAidKit: AIC = 17545.981809935205
	Checking FreeParkingonPremises: AIC = 17546.718425716972
	Checking Gym: AIC = 17546.707717037232
	Checking HairDryer: AIC = 17545.185808516522
	Checking Hangers: AIC = 17545.957801944303
	Checking Heating: AIC = 17544.4013412376
	Checking HotTub: AIC = 17546.553314240955
	Checking IndoorFireplace: AIC = 17542.90735683956
	Checking Internet: AIC = 17546.071049267375
	Checking Iron: AIC = 17546.02161056768
	Checking Kitchen: AIC = 17541.392943560164
	Checking LaptopFriendlyWorkspace: AIC = 17546.66426151343
	Checking LockonBedroomDoor: AIC = 17545.534528155793
	Checking Otherpet(s): AIC = 17546.335670435165
	

	Checking number_of_reviews: AIC = 17537.077448071206
	Checking 24-HourCheck-in: AIC = 17537.764704213125
	Checking Breakfast: AIC = 17538.69932316448
	Checking Buzzer/WirelessIntercom: AIC = 17538.364753391266
	Checking CableTV: AIC = 17537.968554112693
	Checking CarbonMonoxideDetector: AIC = 17536.6240654851
	Checking Cat(s): AIC = 17538.74473752825
	Checking Dog(s): AIC = 17538.855545637012
	Checking Doorman: AIC = 17538.20942622353
	Checking Dryer: AIC = 17536.807080307924
	Checking ElevatorinBuilding: AIC = 17537.262029649206
	Checking Essentials: AIC = 17538.85951615351
	Checking Family/KidFriendly: AIC = 17538.759842229305
	Checking FireExtinguisher: AIC = 17538.691694064804
	Checking FirstAidKit: AIC = 17538.65396038889
	Checking FreeParkingonPremises: AIC = 17538.68777485489
	Checking Gym: AIC = 17538.4613045185
	Checking HairDryer: AIC = 17536.920218863965
	Checking Hangers: AIC = 17537.749783216685
	Checking Heating: AIC = 17537.71617124785
	Checking HotTub: AIC = 17538.2095

	Checking bedrooms: AIC = 17536.833956682123
	Checking beds: AIC = 17537.23332078888
	Checking number_of_reviews: AIC = 17535.528736243996
	Checking 24-HourCheck-in: AIC = 17536.533656892218
	Checking Breakfast: AIC = 17537.009227863826
	Checking Buzzer/WirelessIntercom: AIC = 17537.215243044757
	Checking CableTV: AIC = 17536.52094915999
	Checking Cat(s): AIC = 17537.1645687722
	Checking Dog(s): AIC = 17537.232820293066
	Checking Doorman: AIC = 17536.62416057996
	Checking Dryer: AIC = 17535.834817424715
	Checking Essentials: AIC = 17537.087512682418
	Checking Family/KidFriendly: AIC = 17537.17090587178
	Checking FireExtinguisher: AIC = 17536.244759804475
	Checking FirstAidKit: AIC = 17536.710714035038
	Checking FreeParkingonPremises: AIC = 17537.165740040226
	Checking HairDryer: AIC = 17535.731944879055
	Checking Hangers: AIC = 17536.429567505693
	Checking Heating: AIC = 17536.022905711598
	Checking HotTub: AIC = 17536.58701098195
	Checking Internet: AIC = 17537.11628245593
	Checking I

In [10]:
bestForwardModel.summary()

0,1,2,3
Dep. Variable:,review_scores_rating,R-squared:,0.647
Model:,OLS,Adj. R-squared:,0.645
Method:,Least Squares,F-statistic:,300.7
Date:,"Mon, 03 Dec 2018",Prob (F-statistic):,0.0
Time:,17:46:03,Log-Likelihood:,-8747.7
No. Observations:,3141,AIC:,17540.0
Df Residuals:,3121,BIC:,17660.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.4019,1.607,-0.250,0.803,-3.553,2.749
review_scores_value,3.4511,0.122,28.317,0.000,3.212,3.690
review_scores_cleanliness,2.5753,0.109,23.689,0.000,2.362,2.788
review_scores_checkin,1.4533,0.169,8.622,0.000,1.123,1.784
review_scores_communication,1.5355,0.185,8.280,0.000,1.172,1.899
review_scores_location,0.6539,0.125,5.243,0.000,0.409,0.898
price,0.0038,0.001,3.473,0.001,0.002,0.006
SafetyCard,0.6357,0.184,3.447,0.001,0.274,0.997
WirelessInternet,0.8811,0.432,2.038,0.042,0.033,1.729

0,1,2,3
Omnibus:,1205.889,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,15671.129
Skew:,-1.453,Prob(JB):,0.0
Kurtosis:,13.55,Cond. No.,3610.0


### Backward Selection ###

In [8]:
currentRegression = list(dataForward.columns)
currentRegression.remove('review_scores_rating')

bestAllTimeAic = np.inf
flag = True

while flag:
    currentBestAic = np.inf
    print("Current best AIC = " + str(bestAllTimeAic))
    print("Current regression: " + ', '.join(currentRegression))
    toDrop = None
    for each in currentRegression:
        regTemp = currentRegression[:]
        regTemp.remove(each)
        currentLm = sm.OLS(dataForward.review_scores_rating, sm.add_constant(dataForward[regTemp])).fit()
        currentAic = currentLm.aic
        print("\tChecking " + each + ": AIC = " + str(currentAic))
        if currentAic <= currentBestAic:
            toDrop = each
            currentBestAic = currentAic
    if currentBestAic <= bestAllTimeAic:
        print("\t\tDropping " + toDrop + " from model. New best AIC = " + str(currentBestAic) + '\n\n')
        currentRegression.remove(toDrop)
        bestAllTimeAic = currentBestAic
    else:
        print("\t\tDroping no new variables from model. Ending backward selection.")
        flag = False

Current best AIC = inf
Current regression: accommodates, bathrooms, bedrooms, beds, price, number_of_reviews, review_scores_cleanliness, review_scores_checkin, review_scores_communication, review_scores_location, review_scores_value, 24-HourCheck-in, AirConditioning, Breakfast, Buzzer/WirelessIntercom, CableTV, CarbonMonoxideDetector, Cat(s), Dog(s), Doorman, Dryer, ElevatorinBuilding, Essentials, Family/KidFriendly, FireExtinguisher, FirstAidKit, FreeParkingonPremises, Gym, HairDryer, Hangers, Heating, HotTub, IndoorFireplace, Internet, Iron, Kitchen, LaptopFriendlyWorkspace, LockonBedroomDoor, Otherpet(s), PetsAllowed, Petsliveonthisproperty, Pool, SafetyCard, Shampoo, SmokeDetector, SmokingAllowed, SuitableforEvents, TV, Washer, Washer/Dryer, WheelchairAccessible, WirelessInternet
	Checking accommodates: AIC = 17585.78561888817
	Checking bathrooms: AIC = 17586.290756978382
	Checking bedrooms: AIC = 17582.921406648715
	Checking beds: AIC = 17582.541620427888
	Checking price: AIC = 17

	Checking Buzzer/WirelessIntercom: AIC = 17578.566626306303
	Checking CableTV: AIC = 17579.437074411126
	Checking CarbonMonoxideDetector: AIC = 17581.34429175074
	Checking Cat(s): AIC = 17578.59319992626
	Checking Dog(s): AIC = 17578.508937335566
	Checking Doorman: AIC = 17579.07369357226
	Checking Dryer: AIC = 17579.259972831103
	Checking ElevatorinBuilding: AIC = 17582.645123297607
	Checking Essentials: AIC = 17578.53174256373
	Checking Family/KidFriendly: AIC = 17578.78248386396
	Checking FireExtinguisher: AIC = 17579.192394390302
	Checking FirstAidKit: AIC = 17578.54915713408
	Checking FreeParkingonPremises: AIC = 17578.77520175489
	Checking Gym: AIC = 17579.790861667803
	Checking HairDryer: AIC = 17579.753107121513
	Checking Hangers: AIC = 17578.59718149949
	Checking Heating: AIC = 17579.476880016922
	Checking HotTub: AIC = 17579.012574234326
	Checking IndoorFireplace: AIC = 17580.625031641222
	Checking Internet: AIC = 17578.530939898923
	Checking Iron: AIC = 17578.515315843266
	C

	Checking beds: AIC = 17572.559844339783
	Checking price: AIC = 17580.335991923268
	Checking number_of_reviews: AIC = 17574.053967485168
	Checking review_scores_cleanliness: AIC = 18083.383503098255
	Checking review_scores_checkin: AIC = 17647.711459171875
	Checking review_scores_communication: AIC = 17641.574976109016
	Checking review_scores_location: AIC = 17599.05940354171
	Checking review_scores_value: AIC = 18277.504268401157
	Checking 24-HourCheck-in: AIC = 17573.044067062612
	Checking AirConditioning: AIC = 17575.68771473818
	Checking Breakfast: AIC = 17572.68935048808
	Checking Buzzer/WirelessIntercom: AIC = 17572.581877798646
	Checking CableTV: AIC = 17573.500674673298
	Checking CarbonMonoxideDetector: AIC = 17575.372177769124
	Checking Cat(s): AIC = 17572.612691909813
	Checking Doorman: AIC = 17573.09110722386
	Checking Dryer: AIC = 17573.27619487006
	Checking ElevatorinBuilding: AIC = 17576.661669852092
	Checking Essentials: AIC = 17572.546763875176
	Checking Family/KidFrien

	Checking Washer/Dryer: AIC = 17568.679465680594
	Checking WheelchairAccessible: AIC = 17568.98760305522
	Checking WirelessInternet: AIC = 17571.662794416585
		Dropping beds from model. New best AIC = 17568.608200628914


Current best AIC = 17568.608200628914
Current regression: accommodates, bathrooms, bedrooms, price, number_of_reviews, review_scores_cleanliness, review_scores_checkin, review_scores_communication, review_scores_location, review_scores_value, 24-HourCheck-in, AirConditioning, Breakfast, Buzzer/WirelessIntercom, CableTV, CarbonMonoxideDetector, Cat(s), Doorman, Dryer, ElevatorinBuilding, Family/KidFriendly, FireExtinguisher, FirstAidKit, FreeParkingonPremises, Gym, HairDryer, Hangers, Heating, HotTub, IndoorFireplace, Kitchen, LaptopFriendlyWorkspace, LockonBedroomDoor, Otherpet(s), PetsAllowed, Petsliveonthisproperty, SafetyCard, Shampoo, SmokeDetector, SmokingAllowed, SuitableforEvents, Washer/Dryer, WheelchairAccessible, WirelessInternet
	Checking accommodates: AIC 

	Checking Otherpet(s): AIC = 17562.877885737256
	Checking PetsAllowed: AIC = 17562.9867618514
	Checking Petsliveonthisproperty: AIC = 17564.74048081222
	Checking SafetyCard: AIC = 17572.35069211913
	Checking Shampoo: AIC = 17565.376467188253
	Checking SmokeDetector: AIC = 17562.762839518084
	Checking SmokingAllowed: AIC = 17565.185328529875
	Checking SuitableforEvents: AIC = 17562.870849252635
	Checking Washer/Dryer: AIC = 17562.807039566018
	Checking WheelchairAccessible: AIC = 17563.128894719626
	Checking WirelessInternet: AIC = 17565.7746221697
		Dropping SmokeDetector from model. New best AIC = 17562.762839518084


Current best AIC = 17562.762839518084
Current regression: accommodates, bathrooms, bedrooms, price, number_of_reviews, review_scores_cleanliness, review_scores_checkin, review_scores_communication, review_scores_location, review_scores_value, 24-HourCheck-in, AirConditioning, Breakfast, CableTV, CarbonMonoxideDetector, Cat(s), Doorman, Dryer, ElevatorinBuilding, Family/K

	Checking SafetyCard: AIC = 17560.967267642936
	Checking Shampoo: AIC = 17554.15870054891
	Checking SmokingAllowed: AIC = 17553.930213493128
	Checking WheelchairAccessible: AIC = 17551.812777427647
	Checking WirelessInternet: AIC = 17554.5979396144
		Dropping Otherpet(s) from model. New best AIC = 17551.58154903337


Current best AIC = 17551.58154903337
Current regression: accommodates, bathrooms, bedrooms, price, number_of_reviews, review_scores_cleanliness, review_scores_checkin, review_scores_communication, review_scores_location, review_scores_value, 24-HourCheck-in, AirConditioning, CableTV, CarbonMonoxideDetector, Doorman, Dryer, ElevatorinBuilding, Family/KidFriendly, FireExtinguisher, FreeParkingonPremises, Gym, HairDryer, Heating, HotTub, IndoorFireplace, Kitchen, LaptopFriendlyWorkspace, LockonBedroomDoor, PetsAllowed, Petsliveonthisproperty, SafetyCard, Shampoo, SmokingAllowed, WheelchairAccessible, WirelessInternet
	Checking accommodates: AIC = 17555.67570794307
	Checking b

	Checking review_scores_checkin: AIC = 17619.765370131652
	Checking review_scores_communication: AIC = 17613.719165587365
	Checking review_scores_location: AIC = 17572.197119245466
	Checking review_scores_value: AIC = 18254.12823750873
	Checking 24-HourCheck-in: AIC = 17545.111693692506
	Checking AirConditioning: AIC = 17547.7150227582
	Checking CableTV: AIC = 17545.392846443723
	Checking CarbonMonoxideDetector: AIC = 17547.698697736152
	Checking Doorman: AIC = 17545.14714646122
	Checking Dryer: AIC = 17545.22025268434
	Checking ElevatorinBuilding: AIC = 17549.143890446256
	Checking FireExtinguisher: AIC = 17545.360699190438
	Checking Gym: AIC = 17545.932469791598
	Checking HairDryer: AIC = 17547.959900969385
	Checking Heating: AIC = 17545.385207270654
	Checking HotTub: AIC = 17545.126479383267
	Checking IndoorFireplace: AIC = 17546.64776352128
	Checking Kitchen: AIC = 17546.516818765453
	Checking LaptopFriendlyWorkspace: AIC = 17548.188298833968
	Checking LockonBedroomDoor: AIC = 1754

	Checking CableTV: AIC = 17539.336297809823
	Checking CarbonMonoxideDetector: AIC = 17541.853282315682
	Checking Dryer: AIC = 17539.234372143568
	Checking ElevatorinBuilding: AIC = 17542.895906868445
	Checking FireExtinguisher: AIC = 17539.455026144522
	Checking Gym: AIC = 17539.921178215813
	Checking HairDryer: AIC = 17543.68194728772
	Checking Heating: AIC = 17539.239509278144
	Checking HotTub: AIC = 17539.01947040481
	Checking IndoorFireplace: AIC = 17540.88828520751
	Checking Kitchen: AIC = 17540.470740385397
	Checking LaptopFriendlyWorkspace: AIC = 17541.696155297515
	Checking LockonBedroomDoor: AIC = 17539.582625476407
	Checking Petsliveonthisproperty: AIC = 17541.421682861066
	Checking SafetyCard: AIC = 17548.048765632026
	Checking Shampoo: AIC = 17540.899748610413
	Checking SmokingAllowed: AIC = 17540.935613724712
	Checking WirelessInternet: AIC = 17541.561285486216
		Dropping HotTub from model. New best AIC = 17539.01947040481


Current best AIC = 17539.01947040481
Current reg

	Checking review_scores_checkin: AIC = 17608.73318873374
	Checking review_scores_communication: AIC = 17601.022469884305
	Checking review_scores_location: AIC = 17560.580009760484
	Checking review_scores_value: AIC = 18248.142796781718
	Checking AirConditioning: AIC = 17536.56780483821
	Checking CarbonMonoxideDetector: AIC = 17535.634658739327
	Checking ElevatorinBuilding: AIC = 17537.085000123192
	Checking Gym: AIC = 17534.604857657592
	Checking HairDryer: AIC = 17538.297754768184
	Checking Heating: AIC = 17533.868130568775
	Checking IndoorFireplace: AIC = 17535.659888772407
	Checking Kitchen: AIC = 17535.994080019405
	Checking LaptopFriendlyWorkspace: AIC = 17536.23583906514
	Checking Petsliveonthisproperty: AIC = 17535.685126612498
	Checking SafetyCard: AIC = 17545.13513563189
	Checking Shampoo: AIC = 17535.254132055044
	Checking SmokingAllowed: AIC = 17535.320474258962
	Checking WirelessInternet: AIC = 17536.297499352313
		Dropping Heating from model. New best AIC = 17533.868130568