In [1]:
import pandas as pd
import numpy as np
import re
import json
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from pprint import pprint
import folium
from sklearn.model_selection import train_test_split
from scipy import stats
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

# Modelos
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

# Métricas
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

import pickle

In [2]:
class airbnb:
    
    def __init__(self, data, city_names = None, file = "csv"):
                    
        if (file == "csv") and (city_names is not None):
            
            self.l_dfs = list()
            
            for enum, dataset in enumerate(data):
                
                self.l_dfs.append(pd.read_csv(dataset))
                
                self.l_dfs[enum].drop("source", axis = 1, inplace = True)
                
                self.l_dfs[enum]["city"] = city_names[enum].lower()
        
            self.df = pd.concat(self.l_dfs)
            
            print("Instance created!")
            
        elif file == "dataframe":
            
            self.l_dfs = list()

            for enum, dataframe in enumerate(data):
                
                self.l_dfs.append(dataframe)
                                        
            self.df = pd.concat(self.l_dfs)
            
            print("Instance created!")
            
        else:
            
            print("Only csv or dataframe are valid inputs, and city_names cannot be empty")
            
    def return_initial_df(self):
    
        return self.df
    
    def display__initial_df(self):
    
        display(self.df)

    def clean_tested_columns(self):
        
        """
        Sets predefined columns, transforms price to a float column and separates bathroom_text 
        into 3 different categories, private, shared and unknown.
        """
        
        # Sets predefined columns
        
        tested_cols = ['neighbourhood_cleansed', 'city',
                       'room_type', 'accommodates', 'availability_365',
                       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
                       'minimum_nights', 'maximum_nights',
                       'number_of_reviews', 'reviews_per_month', 'host_total_listings_count']
        
        self.df_cleaned = self.df[tested_cols]
        
        # Transforms price to a float column
        
        self.df_cleaned["price"] = self.df_cleaned["price"].apply(lambda x: float(x.strip("$").replace(',', '')) if pd.notnull(x) else x).values
            
        # Get numbers out of bathroom_text columns
        
        self.df_cleaned = self.df_cleaned[self.df_cleaned["bathrooms_text"].isnull() == False]

        l_nums = [re.findall(r'\d+',i) for i in self.df_cleaned["bathrooms_text"].values]

        l_nums_completed = []

        for i in l_nums:

            if len(i) > 1:

                l_nums_completed.append('.'.join(i))

            elif len(i) == 0:

                l_nums_completed.append('0')

            else:

                l_nums_completed.append(i[0])
                
        # Replace bathrooms_text with floats
        
        self.df_cleaned["bathrooms_text"] = l_nums_completed

        self.df_cleaned["bathrooms_text"] = self.df_cleaned["bathrooms_text"].astype("float64")
        
        # Amenities
                
        l_amenities_cleaned = list()
        
        for i in self.df_cleaned["amenities"]:

            l_amenities_cleaned.append(json.loads(i))

        # Most relevant amenities, detailed analysis in the EDA file

        l_amenities_valuables = ['Long term stays allowed','Cooking basics','Dishes and silverware','Essentials','Coffee maker','Hair dryer','Microwave','Refrigerator','Heating','Air conditioning']

        for j in l_amenities_valuables:

            self.df_cleaned[j] = [1 if j in i else 0 for i in l_amenities_cleaned]

        self.df_cleaned.drop("amenities", axis =1, inplace=True)
    
        # Room type
        
        self.df_cleaned = self.df_cleaned[self.df_cleaned["room_type"] != "Hotel room"]
        self.df_cleaned = pd.concat([self.df_cleaned, pd.get_dummies(data = self.df_cleaned["room_type"])], axis = 1).drop("room_type", axis = 1)
        
        self.df_cleaned.dropna(inplace = True)
        
    def return_cleaned(self):
        
        return self.df_cleaned
    
    def display_cleaned(self):
        
        display(self.df_cleaned)
    
    def remove_outliers(self, accommodates = 8, bathrooms_min = 1, bathrooms_max = 2, bedrooms = 4, beds_min = 1, beds_max = 5, minimum_nights = 30,
                       maximum_nights = 70000, nreviews = 375, reviews_pmonth = 9, price = 350, htlc = 50000):

        self.df_cleaned = self.df_cleaned[self.df_cleaned["accommodates"] <= accommodates]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["bathrooms_text"].between(bathrooms_min, bathrooms_max)]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["bedrooms"] <= bedrooms]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["beds"].between(beds_min, beds_max)]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["minimum_nights"] <= minimum_nights]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["maximum_nights"] <= maximum_nights]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["number_of_reviews"] <= nreviews]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["reviews_per_month"] <= reviews_pmonth]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["price"] <= price]
        self.df_cleaned = self.df_cleaned[self.df_cleaned["host_total_listings_count"] <= htlc]

        return self.df_cleaned
    
    def display_outliers(self):
        
        for i in self.df_cleaned.columns:
    
            print(i)
            sns.kdeplot(self.df_cleaned[i])
            plt.show()

    def label_encoding(self, df = None):
        
        if df is None:
            df = self.df_cleaned
            
        city_encoder = LabelEncoder()
        df["city"] = city_encoder.fit_transform(df["city"])
        neighbourhood_encoder = LabelEncoder()
        df["neighbourhood_cleansed"] = neighbourhood_encoder.fit_transform(df["neighbourhood_cleansed"])
        
        return df
    
    def normalize(self):
        
        x_scaler = MinMaxScaler()
        self.df_cleaned[self.df_cleaned.drop("price", axis = 1).columns] = x_scaler.fit_transform(self.df_cleaned[self.df_cleaned.drop("price", axis = 1).columns])

        y_scaler = MinMaxScaler()
        self.df_cleaned["price"] = y_scaler.fit_transform(self.df_cleaned[["price"]]).flatten()
        
        return self.df_cleaned
    
    def tts(self):
        
        self.X = self.df_cleaned.drop(["price"], axis = 1)
        self.y = self.df_cleaned["price"]
                
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size = 0.2, random_state = 42)

        print(f"X_train: {self.X_train.shape} | y_train: {self.y_train.shape}")
        print(f"X_test: {self.X_test.shape} | y_test: {self.y_test.shape}")
    
    def train_model(self):
        
        models = [LinearRegression(), KNeighborsRegressor(), DecisionTreeRegressor(),
                 RandomForestRegressor(), SVR(), AdaBoostRegressor(), GradientBoostingRegressor()]
                
        metrics = list()
        
        for model in models:
            
            # fit
            
            model.fit(self.X_train, self.y_train)

            # predict
            
            self.yhat = model.predict(self.X_test)
            
            # metrics
            
            r2 = r2_score(self.y_test, self.yhat)
            mse = mean_squared_error(self.y_test, self.yhat)
        
            metrics.append([str(model), r2, mse, model])
            
        self.df_metrics = pd.DataFrame(data = metrics, columns = ["model_name", "r2", "mse", "model"])
        self.df_metrics.sort_values(by = "r2", ascending = False, inplace= True)
        
    def return_metrics(self):
        
        return self.df_metrics
    
    def display_metrics(self):
        
        display(self.df_metrics)
        
    def model_feature_importances(self, model):
        
        importances = np.argsort(model.feature_importances_)[::-1]
        d_importances = dict()
        
        for i in importances:

            d_importances[i] = [model.feature_importances_[i]*100, self.df_cleaned.drop("price", axis = 1).columns[i]]
            print(i, model.feature_importances_[i]*100, self.df_cleaned.drop("price", axis = 1).columns[i])
            
        return d_importances
    
    def grid_search_cv_tuning(self):
        
        model = RandomForestRegressor()
        
        params = {"n_estimators" : [i for i in range(100, 1001, 50)],
                  "max_depth"    : [8, 10, 12, 14, 16],
                  "max_features" : ["log2", "sqrt"]}

        scorers = {"r2", "neg_mean_squared_error"}

        grid_solver = GridSearchCV(estimator  = model, 
                                   param_grid = params, 
                                   scoring    = scorers,
                                   cv         = 10,
                                   refit      = "r2",
                                   n_jobs     = -1, 
                                   verbose    = 2)

        self.model_result = grid_solver.fit(self.X_train, self.y_train)
        
        l_validations = [self.model_result.best_estimator_,
                         self.model_result.cv_results_["mean_test_r2"].max(),
                         self.model_result.best_score_]
        
        self.df_validations = pd.DataFrame(data    = l_validations, 
                                           columns = ["Best Estimator","Mean Test R**2","Best Score"])
        
    def return_model_result_gcv(self):
        
        return self.model_result
        
    def return_validations_gcv(self):
        
        return self.df_validations
                                           
    def return_validations_gcv(self):
        
        return self.df_validations
    
    def final_trial_model(self):
        
        '''It trains the best model with the features recomended'''
        
        model = RandomForestRegressor(max_depth=16, max_features='sqrt', n_estimators=650, random_state = 42)
        model.fit(self.X_train, self.y_train)
        
        self.yhat = model.predict(self.X_test)
    
        return f"r**2 = {r2_score(self.y_test, self.yhat)}"
    
    def train_final_model(self, max_depth, max_features, n_estimators,random_state):
        
        '''Returns the definitive model'''
        
        self.X_def = self.df_cleaned.drop(["price"], axis = 1)
        self.y_def = self.df_cleaned["price"]
        
        model = RandomForestRegressor(max_depth = max_depth, max_features = max_features, n_estimators = n_estimators, random_state = random_state)
        model.fit(self.X_def, self.y_def)
        
        return model
    
    def predict(self, array):  
        
        '''Predicts the price given a cleaned array with te features needed'''
        
        self.price_predicted = y_scaler.inverse_transform([model.predict([array])])
    
    def return_prediction(self):
        
        return self.price_predicted
    
    def save_model(self, name, ext, model):
    
        with open(f"{name}.{ext}", "wb") as file:
            pickle.dump(model, file)
            
    def load_model(self, name, ext):
        
        with open(f"{name}.{ext}", "rb") as file:
            self.model = pickle.load(file)
            
        
    

In [3]:
# Datasets used
madrid = "datasets/madrid.csv"
barcelona = "datasets/barcelona.csv"
london = "datasets/london.csv"

d_csvs, d_names = dict(), dict()

d_csvs["csvs1"] = [madrid, barcelona]
d_csvs["csvs2"] = [london]

d_names["names1"] = ["madrid","barcelona"]
d_names["names2"] = ["london"]

In [4]:
%%time

d_dfs = dict()

for i in range(1,3):
    
    d_dfs[f"instance{i}"] = airbnb(d_csvs[f"csvs{i}"],d_names[f"names{i}"], "csv")

Instance created!
Instance created!
CPU times: user 2.65 s, sys: 211 ms, total: 2.86 s
Wall time: 2.87 s


In [13]:
df_1 = d_dfs["instance1"].return_initial_df()
df_2 = d_dfs["instance2"].return_initial_df()

df_1

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,...,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,city
0,6369,https://www.airbnb.com/rooms/6369,20220911230855,2022-09-12,"Rooftop terrace room , ensuite bathroom",Excellent connection with the AIRPORT and EXHI...,,https://a0.muscache.com/pictures/683224/4cc318...,13660,https://www.airbnb.com/users/show/13660,...,4.82,4.85,,f,1,0,1,0,0.65,madrid
1,21853,https://www.airbnb.com/rooms/21853,20220911230855,2022-09-12,Bright and airy room,We have a quiet and sunny room with a good vie...,We live in a leafy neighbourhood with plenty o...,https://a0.muscache.com/pictures/68483181/87bc...,83531,https://www.airbnb.com/users/show/83531,...,4.21,4.67,,f,2,0,2,0,0.34,madrid
2,24805,https://www.airbnb.com/rooms/24805,20220911230855,2022-09-12,Gran Via Studio Madrid,"Studio located 50 meters from Gran Via, next t...","The area is next to the Gran Via, so people li...",https://a0.muscache.com/pictures/miso/Hosting-...,346366726,https://www.airbnb.com/users/show/346366726,...,5.00,4.83,,f,1,1,0,0,0.25,madrid
3,26825,https://www.airbnb.com/rooms/26825,20220911230855,2022-09-12,Single Room whith private Bathroom,Nice and cozy roon for one person with a priva...,"Es un barrio muy tranquilo, en una zona de Mad...",https://a0.muscache.com/pictures/149358/218d5b...,114340,https://www.airbnb.com/users/show/114340,...,4.73,4.74,,f,1,0,1,0,1.10,madrid
4,363476,https://www.airbnb.com/rooms/363476,20220911230855,2022-09-12,PUERTA DEL SOL-QUIET DOWNTOWN,<b>The space</b><br />We want to welcome this ...,,https://a0.muscache.com/pictures/4264879/ee070...,1836617,https://www.airbnb.com/users/show/1836617,...,4.97,4.52,,f,1,0,1,0,3.74,madrid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16915,26383234,https://www.airbnb.com/rooms/26383234,20220910194401,2022-09-11,LOVELY PRIVATE ROOM FOR 6 GUESTS ENSUITE BATHROOM,"PRIVATE ROOM FOR 6 GUESTS, TOTAL PRICE FOR 6 G...",COLLBLANC QUARTER next to BARCELONA CAMP NOU S...,https://a0.muscache.com/pictures/miso/Hosting-...,198281707,https://www.airbnb.com/users/show/198281707,...,4.37,4.10,HUTB-172880,f,12,0,12,0,0.59,barcelona
16916,26383143,https://www.airbnb.com/rooms/26383143,20220910194401,2022-09-11,LOVELY PRIVATE ROOM FOR 6 GUESTS ENSUITE BATHROOM,"PRIVATE ROOM FOR 6 GUESTS, TOTAL PRICE FOR 6 G...",COLLBLANC QUARTER next to BARCELONA CAMP NOU S...,https://a0.muscache.com/pictures/miso/Hosting-...,198281707,https://www.airbnb.com/users/show/198281707,...,4.00,4.00,HUTB-172880,f,12,0,12,0,0.05,barcelona
16917,26384664,https://www.airbnb.com/rooms/26384664,20220910194401,2022-09-11,HABITACION PRIVADA DE 6 CAMAS CON TERRAZA PRIVADA,PRIVATE ROOM FOR 6 PERSONS WITH PRIVATE TERRAC...,COLLBLANC QUARTER next to BARCELONA CAMP NOU S...,https://a0.muscache.com/pictures/miso/Hosting-...,198281707,https://www.airbnb.com/users/show/198281707,...,4.31,4.31,HUTB-172880,f,12,0,12,0,0.33,barcelona
16918,18220085,https://www.airbnb.com/rooms/18220085,20220910194401,2022-09-11,"Chillout house, with garden and pool for family","Our house, build in 1958 and totally renewed i...",Esplugues de Llobregat / Sant Just is a peacef...,https://a0.muscache.com/pictures/ee758054-4870...,10030977,https://www.airbnb.com/users/show/10030977,...,4.64,4.64,HUTB-017296,f,1,1,0,0,0.19,barcelona


In [14]:
df_mix = airbnb(data= [df_1,df_2], file="dataframe")

Instance created!


In [18]:
df_mix.return_initial_df()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,...,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,city
0,6369,https://www.airbnb.com/rooms/6369,20220911230855,2022-09-12,"Rooftop terrace room , ensuite bathroom",Excellent connection with the AIRPORT and EXHI...,,https://a0.muscache.com/pictures/683224/4cc318...,13660,https://www.airbnb.com/users/show/13660,...,4.82,4.85,,f,1,0,1,0,0.65,madrid
1,21853,https://www.airbnb.com/rooms/21853,20220911230855,2022-09-12,Bright and airy room,We have a quiet and sunny room with a good vie...,We live in a leafy neighbourhood with plenty o...,https://a0.muscache.com/pictures/68483181/87bc...,83531,https://www.airbnb.com/users/show/83531,...,4.21,4.67,,f,2,0,2,0,0.34,madrid
2,24805,https://www.airbnb.com/rooms/24805,20220911230855,2022-09-12,Gran Via Studio Madrid,"Studio located 50 meters from Gran Via, next t...","The area is next to the Gran Via, so people li...",https://a0.muscache.com/pictures/miso/Hosting-...,346366726,https://www.airbnb.com/users/show/346366726,...,5.00,4.83,,f,1,1,0,0,0.25,madrid
3,26825,https://www.airbnb.com/rooms/26825,20220911230855,2022-09-12,Single Room whith private Bathroom,Nice and cozy roon for one person with a priva...,"Es un barrio muy tranquilo, en una zona de Mad...",https://a0.muscache.com/pictures/149358/218d5b...,114340,https://www.airbnb.com/users/show/114340,...,4.73,4.74,,f,1,0,1,0,1.10,madrid
4,363476,https://www.airbnb.com/rooms/363476,20220911230855,2022-09-12,PUERTA DEL SOL-QUIET DOWNTOWN,<b>The space</b><br />We want to welcome this ...,,https://a0.muscache.com/pictures/4264879/ee070...,1836617,https://www.airbnb.com/users/show/1836617,...,4.97,4.52,,f,1,0,1,0,3.74,madrid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69346,9671724,https://www.airbnb.com/rooms/9671724,20220910194334,2022-09-11,"Cozy room in Greenwich, river view","Single room available in a new three bedroom, ...",The following attractions can be found in Gree...,https://a0.muscache.com/pictures/29f1fa4e-299c...,49977905,https://www.airbnb.com/users/show/49977905,...,4.86,4.93,,f,3,1,2,0,0.84,london
69347,20277958,https://www.airbnb.com/rooms/20277958,20220910194334,2022-09-12,River view Tate Modern,"Best location in London, overlooking Thames Ri...","Best location in London, overlooking Thames Ri...",https://a0.muscache.com/pictures/pro_photo_too...,1132775,https://www.airbnb.com/users/show/1132775,...,5.00,4.80,,f,1,1,0,0,0.99,london
69348,20512703,https://www.airbnb.com/rooms/20512703,20220910194334,2022-09-12,"Luxury 4 bed house, 100m to river and palace",Forget your worries in this spacious and seren...,"Quiet, community suburb of London. River comes...",https://a0.muscache.com/pictures/4e5fed61-7fc4...,1706966,https://www.airbnb.com/users/show/1706966,...,5.00,5.00,,f,1,1,0,0,1.49,london
69349,49698549,https://www.airbnb.com/rooms/49698549,20220910194334,2022-09-12,Waterfront Hampton Stay,This luxury waterfront home is located on a ti...,Hampton is located on the outskirts of London ...,https://a0.muscache.com/pictures/44b6e0a6-ffae...,34213667,https://www.airbnb.com/users/show/34213667,...,5.00,4.78,,t,1,1,0,0,0.61,london


In [10]:
df_instance.return_initial_df()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,...,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,city
0,6369,https://www.airbnb.com/rooms/6369,20220911230855,2022-09-12,"Rooftop terrace room , ensuite bathroom",Excellent connection with the AIRPORT and EXHI...,,https://a0.muscache.com/pictures/683224/4cc318...,13660,https://www.airbnb.com/users/show/13660,...,4.82,4.85,,f,1,0,1,0,0.65,madrid
1,21853,https://www.airbnb.com/rooms/21853,20220911230855,2022-09-12,Bright and airy room,We have a quiet and sunny room with a good vie...,We live in a leafy neighbourhood with plenty o...,https://a0.muscache.com/pictures/68483181/87bc...,83531,https://www.airbnb.com/users/show/83531,...,4.21,4.67,,f,2,0,2,0,0.34,madrid
2,24805,https://www.airbnb.com/rooms/24805,20220911230855,2022-09-12,Gran Via Studio Madrid,"Studio located 50 meters from Gran Via, next t...","The area is next to the Gran Via, so people li...",https://a0.muscache.com/pictures/miso/Hosting-...,346366726,https://www.airbnb.com/users/show/346366726,...,5.00,4.83,,f,1,1,0,0,0.25,madrid
3,26825,https://www.airbnb.com/rooms/26825,20220911230855,2022-09-12,Single Room whith private Bathroom,Nice and cozy roon for one person with a priva...,"Es un barrio muy tranquilo, en una zona de Mad...",https://a0.muscache.com/pictures/149358/218d5b...,114340,https://www.airbnb.com/users/show/114340,...,4.73,4.74,,f,1,0,1,0,1.10,madrid
4,363476,https://www.airbnb.com/rooms/363476,20220911230855,2022-09-12,PUERTA DEL SOL-QUIET DOWNTOWN,<b>The space</b><br />We want to welcome this ...,,https://a0.muscache.com/pictures/4264879/ee070...,1836617,https://www.airbnb.com/users/show/1836617,...,4.97,4.52,,f,1,0,1,0,3.74,madrid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16915,26383234,https://www.airbnb.com/rooms/26383234,20220910194401,2022-09-11,LOVELY PRIVATE ROOM FOR 6 GUESTS ENSUITE BATHROOM,"PRIVATE ROOM FOR 6 GUESTS, TOTAL PRICE FOR 6 G...",COLLBLANC QUARTER next to BARCELONA CAMP NOU S...,https://a0.muscache.com/pictures/miso/Hosting-...,198281707,https://www.airbnb.com/users/show/198281707,...,4.37,4.10,HUTB-172880,f,12,0,12,0,0.59,barcelona
16916,26383143,https://www.airbnb.com/rooms/26383143,20220910194401,2022-09-11,LOVELY PRIVATE ROOM FOR 6 GUESTS ENSUITE BATHROOM,"PRIVATE ROOM FOR 6 GUESTS, TOTAL PRICE FOR 6 G...",COLLBLANC QUARTER next to BARCELONA CAMP NOU S...,https://a0.muscache.com/pictures/miso/Hosting-...,198281707,https://www.airbnb.com/users/show/198281707,...,4.00,4.00,HUTB-172880,f,12,0,12,0,0.05,barcelona
16917,26384664,https://www.airbnb.com/rooms/26384664,20220910194401,2022-09-11,HABITACION PRIVADA DE 6 CAMAS CON TERRAZA PRIVADA,PRIVATE ROOM FOR 6 PERSONS WITH PRIVATE TERRAC...,COLLBLANC QUARTER next to BARCELONA CAMP NOU S...,https://a0.muscache.com/pictures/miso/Hosting-...,198281707,https://www.airbnb.com/users/show/198281707,...,4.31,4.31,HUTB-172880,f,12,0,12,0,0.33,barcelona
16918,18220085,https://www.airbnb.com/rooms/18220085,20220910194401,2022-09-11,"Chillout house, with garden and pool for family","Our house, build in 1958 and totally renewed i...",Esplugues de Llobregat / Sant Just is a peacef...,https://a0.muscache.com/pictures/ee758054-4870...,10030977,https://www.airbnb.com/users/show/10030977,...,4.64,4.64,HUTB-017296,f,1,1,0,0,0.19,barcelona


In [None]:
%%time
for instance in d_dfs.values():
    
    instance.clean_tested_columns()

In [None]:
%%time
l_dfs = list()

for instance in d_dfs.values():
    
    l_dfs.append(instance.return_cleaned())

In [None]:
%%time

for instance in d_dfs.values():
    
    instance.remove_outliers()
    

In [None]:
%%time
for instance in d_dfs.values():
    instance.label_encoding()

In [None]:
%%time
for instance in d_dfs.values():
    instance.normalize()

In [None]:
%%time
for instance in d_dfs.values():
    instance.tts()

In [None]:
%%time
for instance in d_dfs.values():
    instance.tts()

In [None]:
%%time
for instance in d_dfs.values():
    instance.grid_search_cv_tuning()

In [None]:
%%time
for instance in d_dfs.values():
    instance.grid_search_cv_validation()

In [None]:
%%time
d_models_resulted = dict()

for name,instance in d_dfs.items():
    d_models_resulted[name] = instance.return_model_result_gcv()

In [None]:
d_models_resulted["instance1"].cv_results_["mean_test_r2"].max()

In [None]:
d_models_resulted["instance1"].best_score_

In [None]:
d_models_resulted["instance1"].

In [None]:
%%time
d_validations_resulted = dict()

for name, instance in d_dfs.items():
    d_validations_resulted[name] = instance.return_validations_gcv()
    

In [None]:
d_dfs["instance1"].save_model("model_1", "pkl",d_models_resulted["instance1"])

In [None]:
d_models_resulted["instance1"].