# Importing required libraries

In [1]:
from sklearn.ensemble import RandomForestRegressor

import pandas as pd
import numpy as np


# Initializing Feature and Response matrices stored as .CSV files earlier

## Training Dataset

In [2]:
X_train = np.loadtxt(r'R:\Ryerson\Misc\Datasets\Preprocessed Data Files\90_Day_Window_Scaled\X_train_90_day_window_scaled.csv', delimiter=',')

y_target_train = np.loadtxt(r'R:\Ryerson\Misc\Datasets\Preprocessed Data Files\90_Day_Window_Scaled\y_target_train_90_day_window_scaled.csv', delimiter=',')


In [3]:
X_train.shape

y_target_train.shape


(211099, 18)

(211099,)

## Validation Dataset

In [4]:
X_valid = np.loadtxt(r'R:\Ryerson\Misc\Datasets\Preprocessed Data Files\90_Day_Window_Scaled\X_valid_90_day_window_scaled.csv', delimiter=',')

y_target_valid = np.loadtxt(r'R:\Ryerson\Misc\Datasets\Preprocessed Data Files\90_Day_Window_Scaled\y_target_valid_90_day_window_scaled.csv', delimiter=',')


In [5]:
X_valid.shape

y_target_valid.shape


(21839, 18)

(21839,)

## Testing Dataset

In [6]:
X_test = np.loadtxt(r'R:\Ryerson\Misc\Datasets\Preprocessed Data Files\90_Day_Window_Scaled\X_test_90_day_window_scaled.csv', delimiter=',')

y_target_test = np.loadtxt(r'R:\Ryerson\Misc\Datasets\Preprocessed Data Files\90_Day_Window_Scaled\y_target_test_90_day_window_scaled.csv', delimiter=',')


In [7]:
X_test.shape

y_target_test.shape


(21881, 18)

(21881,)

# Training a Random Forest with 1000 Trees

In [8]:
rf = RandomForestRegressor(n_estimators=1000, n_jobs=-1, random_state=42)


In [9]:
rf.fit(X_train, y_target_train)


RandomForestRegressor(n_estimators=1000, n_jobs=-1, random_state=42)

In [11]:
yPredictedValidation = rf.predict(X_valid)

In [12]:
from sklearn import metrics
print('Validation Metrics:')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_target_valid, yPredictedValidation))
print('Mean Squared Error:', metrics.mean_squared_error(y_target_valid, yPredictedValidation))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_target_valid, yPredictedValidation)))
print('R^2 Score:', metrics.r2_score(y_target_valid, yPredictedValidation))

Validation Metrics:
Mean Absolute Error: 0.49167515422041713
Mean Squared Error: 0.5456668905483621
Root Mean Squared Error: 0.7386926901955658
R^2 Score: 0.34641260392010553


In [15]:
yPredictedTesting = rf.predict(X_test)


In [16]:
print('Testing Metrics:')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_target_test, yPredictedTesting))
print('Mean Squared Error:', metrics.mean_squared_error(y_target_test, yPredictedTesting))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_target_test, yPredictedTesting)))
print('R^2 Score:', metrics.r2_score(y_target_test, yPredictedTesting))

Testing Metrics:
Mean Absolute Error: 0.48449318515312806
Mean Squared Error: 0.5299518322945836
Root Mean Squared Error: 0.7279779064604802
R^2 Score: 0.2719428292651286


In [17]:
rf.score(X_valid, y_target_valid)

0.34641260392010553

In [18]:
rf.score(X_test, y_target_test)

0.2719428292651286

In [19]:
yPredictedValidation

array([0.3798527 , 0.0264804 , 0.8607079 , ..., 0.7352861 , 2.0032945 ,
       1.48547023])

In [20]:
yPredictedTesting

array([0.8281098, 1.0808252, 1.3807517, ..., 1.4577227, 1.1730794,
       1.7385242])

# Training a Random Forest with 100 Trees

In [29]:
rf_2 = RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=10)


In [30]:
rf_2.fit(X_train, y_target_train)


RandomForestRegressor(n_jobs=-1, random_state=10)

In [25]:
yPredictedValidation_rf2 = rf_2.predict(X_valid)

In [26]:
print('Validation Metrics:')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_target_valid, yPredictedValidation_rf2))
print('Mean Squared Error:', metrics.mean_squared_error(y_target_valid, yPredictedValidation_rf2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_target_valid, yPredictedValidation_rf2)))
print('R^2 Score:', metrics.r2_score(y_target_valid, yPredictedValidation_rf2))

Validation Metrics:
Mean Absolute Error: 0.4926437918919081
Mean Squared Error: 0.5507769299174985
Root Mean Squared Error: 0.7421434699015403
R^2 Score: 0.34029191493752275


In [27]:
yPredictedTesting_rf2 = rf_2.predict(X_test)


In [28]:
print('Testing Metrics:')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_target_test, yPredictedTesting_rf2))
print('Mean Squared Error:', metrics.mean_squared_error(y_target_test, yPredictedTesting_rf2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_target_test, yPredictedTesting_rf2)))
print('R^2 Score:', metrics.r2_score(y_target_test, yPredictedTesting_rf2))

Testing Metrics:
Mean Absolute Error: 0.48668791671704736
Mean Squared Error: 0.5398769546257258
Root Mean Squared Error: 0.7347631962923332
R^2 Score: 0.258307521217751


# Training a Random Forest with 500 Trees

In [31]:
rf_3 = RandomForestRegressor(n_estimators=500, n_jobs=-1, random_state=12)


In [32]:
rf_3.fit(X_train, y_target_train)


RandomForestRegressor(n_estimators=500, n_jobs=-1, random_state=12)

In [33]:
yPredictedValidation_rf3 = rf_3.predict(X_valid)

In [34]:
print('Validation Metrics:')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_target_valid, yPredictedValidation_rf3))
print('Mean Squared Error:', metrics.mean_squared_error(y_target_valid, yPredictedValidation_rf3))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_target_valid, yPredictedValidation_rf3)))
print('R^2 Score:', metrics.r2_score(y_target_valid, yPredictedValidation_rf3))

Validation Metrics:
Mean Absolute Error: 0.49187506886944654
Mean Squared Error: 0.5458887840077624
Root Mean Squared Error: 0.738842868279692
R^2 Score: 0.34614682497905447


In [35]:
yPredictedTesting_rf3 = rf_3.predict(X_test)


In [36]:
print('Testing Metrics:')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_target_test, yPredictedTesting_rf3))
print('Mean Squared Error:', metrics.mean_squared_error(y_target_test, yPredictedTesting_rf3))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_target_test, yPredictedTesting_rf3)))
print('R^2 Score:', metrics.r2_score(y_target_test, yPredictedTesting_rf3))

Testing Metrics:
Mean Absolute Error: 0.4847769038680257
Mean Squared Error: 0.5312332680108222
Root Mean Squared Error: 0.7288575087154019
R^2 Score: 0.2701823702852931
