In [26]:
from warnings import simplefilter

import numpy as np
import pandas as pd
from sklearn.compose import TransformedTargetRegressor
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR

from sklearn_helpers import (
    ResultContainer,
    fit_models,
    get_column_transformer,
    get_models,
    get_preprocessor,
    show_coefficients,
)

simplefilter(action="ignore", category=FutureWarning)
pd.set_option("precision", 3)
pd.set_option('display.max_columns', 100)


In [28]:
# NOTE: For Experimentation we train model on the entire data set without splitting in training and test set
listings_extended = pd.read_pickle("../data-clean/listings_extended.pkl")
X = listings_extended.drop(columns="price")
y = listings_extended["price"]

X.shape

(2958, 38)

In [33]:
# BOOKMARK: Hyperparameters
random_state = 42
n_folds = 10
log_y = True

# 112 total encoded features in listings_extended
# fitting with all 112 features leads to error of evaluating metrics
num_features_list = [10, 25, 50, 75]


In [34]:
column_transformer = get_column_transformer()


In [35]:
# SUBSECTION: Analyze Performance for different values of num_features
result_list = []
for num_features in num_features_list:
    if num_features is None:
        preprocessor = column_transformer
    else:
        rfe = RFE(SVR(kernel="linear"), n_features_to_select=num_features, step=0.5)
        preprocessor = get_preprocessor(column_transformer, rfe)
        
    models = get_models(
        preprocessor, models=["linear"], random_state=random_state, log_y=log_y
    )
    result_container = ResultContainer()

    result = fit_models(
        X,
        y,
        models,
        result_container,
        n_folds,
        random_state=random_state,
        log_y=log_y,
    )
    result_list.append(result.display_df())

collected_results = pd.concat(result_list)


Fitting LinearRegression
Finished training in 40.25 seconds
Fitting LinearRegression
Finished training in 41.34 seconds
Fitting LinearRegression
Finished training in 40.11 seconds
Fitting LinearRegression
Finished training in 29.95 seconds


In [36]:
collected_results.sort_values("mae_val")


Unnamed: 0,mae_train,mae_val,r2_train,r2_val,mse_train,mse_val,hyperparam_keys,hyperparam_values,num_features,feature_selector,log_y
LinearRegression,449.992,463.27,0.224,0.181,2424000.0,2568000.0,,,50,RFE,True
LinearRegression,447.443,465.33,0.227,0.06,2416000.0,2726000.0,,,75,RFE,True
LinearRegression,461.098,471.376,0.224,0.188,2426000.0,2560000.0,,,25,RFE,True
LinearRegression,471.616,477.174,0.218,0.225,2445000.0,2532000.0,,,10,RFE,True


In [37]:
# SUBSECTION: Analyze Coefficients for different values of num_features
num_features = 25
rfe = RFE(SVR(kernel="linear"), n_features_to_select=num_features, step=0.5)
preprocessor = get_preprocessor(column_transformer, rfe)
model = LinearRegression()

pipeline = make_pipeline(preprocessor, model)
log_transform = TransformedTargetRegressor(pipeline, func=np.log, inverse_func=np.exp)

log_transform.fit(X, y)
show_coefficients(log_transform)


Unnamed: 0,feature,coefficient
0,property_type_Houseboat,2.506
1,property_type_Private room in guest suite,0.982
2,maximum_nights_avg_ntm,0.705
3,property_type_Private room in boat,0.51
4,property_type_Entire villa,0.375
5,room_type_Entire home/apt,0.31
6,property_type_Entire serviced apartment,0.241
7,accommodates,0.201
8,maximum_minimum_nights,0.185
9,property_type_Private room in bed and breakfast,0.13


In [38]:
X["property_type"].value_counts()


Entire rental unit                     1360
Entire condominium (condo)              654
Private room in rental unit             401
Entire residential home                 123
Private room in condominium (condo)     101
Entire townhouse                         67
Entire loft                              61
Private room in residential home         41
Entire villa                             21
Shared room in rental unit               18
Private room in loft                     17
Entire serviced apartment                15
Private room                             10
Entire guest suite                        9
Private room in townhouse                 8
Private room in villa                     6
Shared room in condominium (condo)        6
Entire guesthouse                         6
Tiny house                                5
Private room in bed and breakfast         5
Camper/RV                                 4
Entire cabin                              4
Private room in guesthouse      

In [39]:
listings_extended.loc[listings_extended["property_type"] == "Houseboat"]

Unnamed: 0_level_0,neighbourhood,room_type,price,minimum_nights,number_of_reviews,calculated_host_listings_count,availability_365,number_of_reviews_ltm,host_is_superhost,host_listings_count,host_total_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,property_type,accommodates,bedrooms,beds,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,number_of_reviews_l30d,instant_bookable,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,number_bathrooms,shared_bathrooms,host_gender,number_amenities,number_front_page_pictures
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
24616982,Frogner,Entire home/apt,30000,2,0,18,180,0,f,20,20,t,t,Frogner,Houseboat,8,4,6,5,2,2,5,5,2.0,5.0,t,30,60,90,0,f,18,0,0,5.0,False,male,16,5
