In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics


import mlflow
import mlflow.sklearn
import logging

from airbnb_prediction.mlflow_utils import UiConn, MLflowRegressionModel

pd.set_option('display.float_format', '{:.2f}'.format)

In [2]:
conn = UiConn()
conn.create_ui_session()

Access for UI at: http://127.0.0.1:5555


In [3]:
df = pd.read_csv('../data/cleaned/df_to_model.csv')
df.fillna(0, inplace=True)


In [4]:
price_df = df.drop('log_price', axis=1)
log_price_df = df.drop('price', axis=1)

In [5]:
rfr_params_01 = {
    'n_estimators': 200,
    'max_depth': 200
}


rfr_params_02 = {
    'n_estimators': 200,
    'max_depth': 300
}

rfr_params_03 = {
    'n_estimators': 400,
    'max_depth': 450
}

rfr_params_04 = {
    'n_estimators': 500,
    'max_depth': None
}


param_list = [
    rfr_params_01,
    rfr_params_02,
    rfr_params_03,
    rfr_params_04
] 

In [6]:
lr_model =  MLflowRegressionModel(LinearRegression)

In [7]:
lr_model.mlflow_run(price_df, 
                    'price', 
                    'linear_regressor', 
                    r_name='linear_regression_price')

----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id 0161722365a9496583ab02b76d78872c and experiment_id 0
Mean Absolute Error    : 856.6034189765395
Mean Squared Error     : 95743308.608073
Root Mean Squared Error: 9784.85097526135
R2                     : 0.0007777069323278152


('0', '0161722365a9496583ab02b76d78872c')

In [8]:
lr_model.mlflow_run(log_price_df, 
                    'log_price', 
                    'linear_regressor', 
                    r_name='linear_regression_log_price', 
                    log_price=True)


----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id 6b0f41b1067546ffa74ffeed885c153c and experiment_id 0
Mean Absolute Error    : 708.7293158070956
Mean Squared Error     : 97067276.37732108
Root Mean Squared Error: 9852.272650374687
R2                     : -0.013039844702024794


('0', '6b0f41b1067546ffa74ffeed885c153c')

In [9]:
for params in param_list:
    random_forest_model = MLflowRegressionModel(RandomForestRegressor, params)
    random_forest_model.mlflow_run(price_df, 
                                   'price', 
                                   'random_forest_regressor', 
                                   r_name='rfr_price')

----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id e3fcd2520651443b816d55cd5440fb2d and experiment_id 0
Mean Absolute Error    : 802.1369230754998
Mean Squared Error     : 91983185.6926815
Root Mean Squared Error: 9590.786500213708
R2                     : 0.040020121847337164
----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id 8ccefb57d53049ff87634f76553c61da and experiment_id 0
Mean Absolute Error    : 800.5283849410308
Mean Squared Error     : 91880169.56359115
Root Mean Squared Error: 9585.414417936825
R2                     : 0.04109524672268472
----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id ee3a42fe760d499bba8c2c1cd1ce886f and experiment_id 0
Mean Absolute Error    : 797.7462413818395
Mean Squared Error     : 91861776.8775682
Root Mean Squar

In [10]:
for params in param_list:
    random_forest_model = MLflowRegressionModel(RandomForestRegressor, params)
    random_forest_model.mlflow_run(log_price_df, 
                                   'log_price', 
                                   'random_forest_regressor', 
                                   r_name='rfr_log_price',
                                  log_price=True)

----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id 38310805e1b94b6c95da119dcdc05cb2 and experiment_id 0
Mean Absolute Error    : 675.9145540194304
Mean Squared Error     : 95401634.64850183
Root Mean Squared Error: 9767.376037017406
R2                     : 0.004343577407534882
----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id aa310442c01d40da8e7470ec3af2c193 and experiment_id 0
Mean Absolute Error    : 675.806602022683
Mean Squared Error     : 95421748.28312238
Root Mean Squared Error: 9768.405616226344
R2                     : 0.004133662036950092
----------------------------------------------------------------------------------------------------
Inside MLflow Run with run_id dc3d0ff67835489791820c55fc2e3630 and experiment_id 0
Mean Absolute Error    : 676.015404129211
Mean Squared Error     : 95502515.59112431
Root Mean Squa

In [11]:
mlflow.get_tracking_uri()

'file:///home/victordualibi/%C3%81rea%20De%20Trabalho/projects/personal/airbnb/notebooks/mlruns'

In [None]:
file:///home/victordualibi/%C3%81rea%20De%20Trabalho/projects/personal/airbnb/airbnb_prediction