In [None]:
import pandas as pd
import pickle
import joblib
import numpy as np

# Models
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

#Tuning and Cross Validation
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

#### Importing Data

In [None]:
x_train = pd.read_csv('../../Data Files/Training Data/x_train.csv')
x_test = pd.read_csv('../../Data Files/Training Data/x_test.csv')
y_train = pd.read_csv('../../Data Files/Training Data/y_train.csv')
y_test = pd.read_csv('../../Data Files/Training Data/y_test.csv')

#### Defining RandomForest Architecture

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=123)
y_train = np.ravel(y_train)
y_test = np.ravel(y_train)

#### Training RandomForest

In [None]:
rf_model = rf.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)

#### Tuning for Hyperparameters with Cross Validation

In [None]:
rf_base = RandomForestRegressor(random_state=123)
param_grid = {
    'n_estimators' : [100, 200, 400],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
estimator = GridSearchCV(rf_model, param_grid, cv=3)
estimator.fit(x_train,y_train)

In [None]:
optimal_rf = estimator.best_estimator_

y_pred = optimal_rf.predict(x_test)
mse_after_tuning = mean_squared_error(y_test, y_pred)
print("mean_squared_error = {:.2%}".format(mse_after_tuning))

#### Generating Predictions

In [None]:
scores = cross_val_score(rf_base, x_train, y_train, cv=5, scoring='neg_mean_absolute_error')
print("Mean Absolute Error:", -scores.mean())
print("Standard deviation:", scores.std())

#### Saving Model File and Predictions

In [None]:
save_path = '../../Data Files/'
pickle.dump(rf, open(save_path + 'Model Files/' + 'rf.pkl', 'wb'))
np.savetxt(save_path + 'Predictions/' + 'randomforest_output.csv', y_pred_rf, delimiter=",")