# Titanic Notebook

## Random Forest Classifier

In this notebook, we will complete a Supervised Machine Learning project. 

In [8]:
# Import libraries
import pandas as pd

# Importing the machine learning model
from sklearn.ensemble import RandomForestClassifier

# Import GridSearchCV to find the model with the best parameters
from sklearn.model_selection import GridSearchCV

# Importing the the functions to measure metrics for the model
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [9]:
# Import training, validation, and test data sets
trainF = pd.read_csv('./train_features.csv')
trainL = pd.read_csv('./train_labels.csv')

valF = pd.read_csv('./validation_features.csv')
valL = pd.read_csv('./validation_labels.csv')

testF = pd.read_csv('./test_features.csv')
testL = pd.read_csv('./test_labels.csv')

### Tuning the Hyperparameters 

In [18]:
def printResults(results):
    print('Best ML Params: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stnDvs = results.cv_results_['std_test_score']

    for mean, stnDvs, params in zip(means, stnDvs, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(
                round(mean, 3),
                round(stnDvs*2, 3),
                params
            )
        )


In [19]:
rfModel = RandomForestClassifier()

# (Hyper)parameters according to Sckit Learn docs
parameters = {
    'n_estimators': [5, 50, 100],
    'max_depth': [2, 10, 20, None]
}

gridSearch = GridSearchCV(rfModel, parameters, cv=5)
gridSearch.fit(trainF, trainL.values.ravel())

printResults(gridSearch)

Best ML Params: {'max_depth': 10, 'n_estimators': 50}

0.799 (+/-0.039) for {'max_depth': 2, 'n_estimators': 5}
0.801 (+/-0.059) for {'max_depth': 2, 'n_estimators': 50}
0.799 (+/-0.057) for {'max_depth': 2, 'n_estimators': 100}
0.809 (+/-0.096) for {'max_depth': 10, 'n_estimators': 5}
0.833 (+/-0.047) for {'max_depth': 10, 'n_estimators': 50}
0.818 (+/-0.045) for {'max_depth': 10, 'n_estimators': 100}
0.811 (+/-0.04) for {'max_depth': 20, 'n_estimators': 5}
0.818 (+/-0.036) for {'max_depth': 20, 'n_estimators': 50}
0.824 (+/-0.042) for {'max_depth': 20, 'n_estimators': 100}
0.79 (+/-0.041) for {'max_depth': None, 'n_estimators': 5}
0.822 (+/-0.034) for {'max_depth': None, 'n_estimators': 50}
0.82 (+/-0.028) for {'max_depth': None, 'n_estimators': 100}
