### Random Forest

Important Parameters

- **max_depth** : Depth of the Trees ( How Deep each Individual Tree can go )

- **n_estimators** : Number of Estimators ( N Independent Decision Trees )

In [1]:
import joblib
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=DeprecationWarning)

X_train = pd.read_csv('../Data/X_train.csv')
y_train = pd.read_csv('../Data/Y_train.csv')

In [2]:
def performance(results):
    print(f'Best Parameters : {results.best_params_}\n')
    mean = results.cv_results_['mean_test_score']
    std = results.cv_results_['std_test_score']
    params = results.cv_results_['params']
    for mean, std, params in zip(mean, std, params):
        print(f'{round(mean,2)} | (+/-{round(std*2,2)}) for {params}')

In [3]:
rfc = RandomForestClassifier()
parameters = {
    'n_estimators':[5,50,100],
    'max_depth':[2,4,8,16,32,None]
}

gscv = GridSearchCV(rfc, parameters, cv=5)
gscv.fit(X_train, y_train.values.ravel())

print(performance(gscv))

Best Parameters : {'max_depth': 4, 'n_estimators': 50}

0.79 | (+/-0.09) for {'max_depth': 2, 'n_estimators': 5}
0.8 | (+/-0.11) for {'max_depth': 2, 'n_estimators': 50}
0.81 | (+/-0.11) for {'max_depth': 2, 'n_estimators': 100}
0.83 | (+/-0.06) for {'max_depth': 4, 'n_estimators': 5}
0.83 | (+/-0.11) for {'max_depth': 4, 'n_estimators': 50}
0.82 | (+/-0.12) for {'max_depth': 4, 'n_estimators': 100}
0.8 | (+/-0.08) for {'max_depth': 8, 'n_estimators': 5}
0.82 | (+/-0.07) for {'max_depth': 8, 'n_estimators': 50}
0.83 | (+/-0.07) for {'max_depth': 8, 'n_estimators': 100}
0.79 | (+/-0.04) for {'max_depth': 16, 'n_estimators': 5}
0.82 | (+/-0.03) for {'max_depth': 16, 'n_estimators': 50}
0.81 | (+/-0.02) for {'max_depth': 16, 'n_estimators': 100}
0.78 | (+/-0.05) for {'max_depth': 32, 'n_estimators': 5}
0.82 | (+/-0.02) for {'max_depth': 32, 'n_estimators': 50}
0.81 | (+/-0.03) for {'max_depth': 32, 'n_estimators': 100}
0.78 | (+/-0.05) for {'max_depth': None, 'n_estimators': 5}
0.8 | (+/-

In [4]:
gscv.best_estimator_

RandomForestClassifier(max_depth=4, n_estimators=50)

Write Model **Pickle** 

In [5]:
joblib.dump(gscv.best_estimator_,'../Data/RFC_Model.pkl')

['../Data/RFC_Model.pkl']