In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score

In [2]:
%store -r listings_encoded

In [3]:
def train_and_eval(model, X_train, y_train, X_test, y_test, depth, estim=None):
    """
    Runs training and evaluation of given decision tree or random forest
    regressor and prints resulting metrics.
    """

    model.fit(X_train, y_train)
    extra_str = "" if estim is None else f"{estim} estimators and "
    print(f"\nTraining {model.__class__.__name__} with {extra_str} max_depth={depth}\n")

    # results on training set
    y_train_pred = model.predict(X_train)
    y_train_exp , y_train_pred_exp = np.exp(y_train), np.exp(y_train_pred)
    train_mae = mean_absolute_error(y_train, y_train_pred)
    train_rmse = root_mean_squared_error(y_train, y_train_pred)
    train_r2 = r2_score(y_train, y_train_pred)
    # results on test set
    y_pred = model.predict(X_test)
    y_test_exp , y_train_pred_exp = np.exp(y_test), np.exp(y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = root_mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Mean Absolute Error - train: {train_mae}, test: {mae}")
    print(f"Root Mean Squared Error - train: {train_rmse}, test: {rmse}")
    print(f"R^2 Score - train: {train_r2}, test: {r2}")

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    listings_encoded.drop("price", axis=1),
    listings_encoded["price"],
    test_size=0.2,
)

In [5]:
y_train, y_test = np.log(y_train),np.log(y_test)

In [6]:
# ----------------------------------
# Random Forest Hyperparameter Search
# ----------------------------------
for estim in [100, 200]:
    for depth in [5,6,8]:

        model = RandomForestRegressor(
            n_estimators=estim, random_state=42, max_depth=depth
        )

        train_and_eval(
            model, X_train, y_train, X_test, y_test, estim=estim, depth=depth
        )

# ----------------------------------
# Decision Tree Hyperparameter Search
# ----------------------------------

for depth in [6, 8, 10, 12]:
    model = DecisionTreeRegressor(max_depth=depth)
    train_and_eval(model, X_train, y_train, X_test, y_test, depth=depth)


Training RandomForestRegressor with 100 estimators and  max_depth=5

Mean Absolute Error - train: 0.30197419938174386, test: 0.31816808180555184
Root Mean Squared Error - train: 0.39501628360797136, test: 0.4189831076389828
R^2 Score - train: 0.561501565821888, test: 0.49625706935992464

Training RandomForestRegressor with 100 estimators and  max_depth=6

Mean Absolute Error - train: 0.2832840125356306, test: 0.30606539469843336
Root Mean Squared Error - train: 0.36973119023402107, test: 0.40268300180104377
R^2 Score - train: 0.6158416944644176, test: 0.5346898436930045

Training RandomForestRegressor with 100 estimators and  max_depth=8

Mean Absolute Error - train: 0.24582206438541337, test: 0.2885122915783986
Root Mean Squared Error - train: 0.3192533369878678, test: 0.38001277155898283
R^2 Score - train: 0.7135763068676253, test: 0.585607083955335

Training RandomForestRegressor with 200 estimators and  max_depth=5

Mean Absolute Error - train: 0.3025585245499273, test: 0.31888632