In [1]:
import joblib
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import warnings
# Ignore FutureWarning and DeprecationWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [2]:
train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')

In [3]:
print(train_features.shape[0])
print(train_labels.shape[0])

59
59


In [4]:
def print_result(results):
    # Display the best parameters found during grid search
    print('BEST PARAMETERS: {}\n'.format(results.best_params_))

    # Extract mean test scores, standard deviations, and corresponding parameter sets
    mean_scores = results.cv_results_['mean_test_score']
    std_scores = results.cv_results_['std_test_score']
    parameters = results.cv_results_['params']

    # Display mean test scores, along with their standard deviations, for each parameter set
    for mean, std, params in zip(mean_scores, std_scores, parameters):
        print('{} (+/- {}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [5]:
# Create a Random Forest model
rf = RandomForestClassifier()

# Set up the parameter grid for grid search
parameters = {
    'n_estimators': [5, 50, 250],
    'max_depth': [2, 4, 8, 16, 32, None]
}

# Perform grid search
cv = GridSearchCV(rf, parameters, cv=3)
cv.fit(train_features, train_labels.values.ravel())

# Display the results
print_result(cv)

BEST PARAMETERS: {'max_depth': None, 'n_estimators': 5}

0.509 (+/- 0.025) for {'max_depth': 2, 'n_estimators': 5}
0.541 (+/- 0.104) for {'max_depth': 2, 'n_estimators': 50}
0.542 (+/- 0.085) for {'max_depth': 2, 'n_estimators': 250}
0.525 (+/- 0.041) for {'max_depth': 4, 'n_estimators': 5}
0.525 (+/- 0.072) for {'max_depth': 4, 'n_estimators': 50}
0.542 (+/- 0.085) for {'max_depth': 4, 'n_estimators': 250}
0.391 (+/- 0.142) for {'max_depth': 8, 'n_estimators': 5}
0.475 (+/- 0.041) for {'max_depth': 8, 'n_estimators': 50}
0.509 (+/- 0.025) for {'max_depth': 8, 'n_estimators': 250}
0.541 (+/- 0.104) for {'max_depth': 16, 'n_estimators': 5}
0.509 (+/- 0.025) for {'max_depth': 16, 'n_estimators': 50}
0.492 (+/- 0.063) for {'max_depth': 16, 'n_estimators': 250}
0.508 (+/- 0.063) for {'max_depth': 32, 'n_estimators': 5}
0.525 (+/- 0.122) for {'max_depth': 32, 'n_estimators': 50}
0.509 (+/- 0.025) for {'max_depth': 32, 'n_estimators': 250}
0.578 (+/- 0.156) for {'max_depth': None, 'n_estimat

In [6]:
joblib.dump(cv.best_estimator_,'RF_model.pkl')

['RF_model.pkl']