# Random Forest Regressor

### Import Libraries

In [2]:
import pandas as pd
import numpy  as np

from sklearn import metrics as mt
from sklearn import ensemble as en

### Load Dataset

In [3]:
X_train = pd.read_csv('Training/X_training.csv')
y_train = pd.read_csv('Training/y_training.csv')
X_val = pd.read_csv('Validation/X_val.csv')
y_val = pd.read_csv('Validation/y_val.csv')
X_test = pd.read_csv('Test/X_test.csv')
y_test = pd.read_csv('Test/y_test.csv')

In [4]:
y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

### Model Training

In [5]:
m = np.arange( 1, 41, 1)
mse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in m:
    # Define
    model = en.RandomForestRegressor(max_depth=i, n_estimators=50)

    # Fit
    model.fit(X_train, y_train)

    # Precidt
    yhat_train = model.predict(X_train)

    r2 = mt.r2_score(y_train, yhat_train)
    if r2 > max_r2:
        max_r2 = r2
    
    mse = mt.mean_squared_error(y_train, yhat_train)
    mse_list.append( mse )
    if mse < min_mse:
        min_mse = mse

    rmse = np.sqrt(mse)
    if rmse < min_rmse:
        min_rmse = rmse
    
    mae = mt.mean_absolute_error(y_train, yhat_train)
    if mae < min_mae:
        min_mae = mae    

    mape = mt.mean_absolute_percentage_error(y_train, yhat_train)
    if mape < min_mape:
        min_mape = mape
   

print(f'Max R2: {max_r2}\n'
      f'Min MSE: {min_mse}\n'
      f'Min RMSE: {min_rmse}\n'
      f'Min MAE: {min_mae}\n'
      f'Min MAPE: {min_mape}')

Max R2: 0.8995249014702715
Min MSE: 48.02835906241995
Min RMSE: 6.930249567109395
Min MAE: 4.918861317542759
Min MAPE: 2.538294491101316


### Validation

In [9]:
# The best parameters retraining with validation data
best_m = mse_list.index( min ( mse_list ) )

# Define
model = en.RandomForestRegressor(max_depth=m[best_m], n_estimators=50)

# Fit
model.fit(X_train, y_train)

# Precidt
yhat_val = model.predict(X_val)

r2 = mt.r2_score(y_val, yhat_val)

mse = mt.mean_squared_error(y_val, yhat_val)

rmse = np.sqrt(mse)

mae = mt.mean_absolute_error(y_val, yhat_val)   

mape = mt.mean_absolute_percentage_error(y_val, yhat_val)   

print(f'R2: {r2}\n'
      f'MSE: {mse}\n'
      f'RMSE: {rmse}\n'
      f'MAE: {mae}\n'
      f'MAPE: {mape}')

R2: 0.32392991961979134
MSE: 322.8313138779214
RMSE: 17.967507169274246
MAE: 13.088959430009396
MAPE: 7.025925844193222


### Test

In [8]:
# The best parameters retraining with test data
best_m = mse_list.index( min ( mse_list ) )

# Define
model = en.RandomForestRegressor(max_depth=m[best_m], n_estimators=50)

# Fit
model.fit( np.concatenate( ( X_train, X_val ) ),
           np.concatenate( ( y_train, y_val ) ) )

# Precidt
y_pred = model.predict(X_test)

r2 = mt.r2_score(y_test, y_pred)

mse = mt.mean_squared_error(y_test, y_pred)

rmse = np.sqrt(mse)

mae = mt.mean_absolute_error(y_test, y_pred)   

mape = mt.mean_absolute_percentage_error(y_test, y_pred)   

print(f'R2: {r2}\n'
      f'MSE: {mse}\n'
      f'RMSE: {rmse}\n'
      f'MAE: {mae}\n'
      f'MAPE: {mape}')

R2: 0.3980213881962088
MSE: 293.10397355042846
RMSE: 17.120279599072806
MAE: 12.291762019991143
MAPE: 6.292061873043673


