In [11]:
import joblib
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputClassifier
import warnings
# Ignore FutureWarning and DeprecationWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [12]:
target_variables = ['O_Cor', 'I_Cor', 'P_Co', 'Top_Cat', 'Low_Cat']

train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')[target_variables]

In [13]:
def print_result(results):
    # Display the best parameters found during grid search
    print('BEST PARAMETERS: {}\n'.format(results.best_params_))

    # Extract mean test scores, standard deviations, and corresponding parameter sets
    mean_scores = results.cv_results_['mean_test_score']
    std_scores = results.cv_results_['std_test_score']
    parameters = results.cv_results_['params']

    # Display mean test scores, along with their standard deviations, for each parameter set
    for mean, std, params in zip(mean_scores, std_scores, parameters):
        print('{} (+/- {}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [15]:
# Create a Random Forest model
rf = RandomForestClassifier()

# Use the MultiOutputClassifier for multi-output classification
multi_output_classifier = MultiOutputClassifier(rf)

# Set up the parameter grid for grid search
parameters = {
    'estimator__n_estimators': [5, 50, 250],
    'estimator__max_depth': [2, 4, 8, 16, 32, None]
}

# Perform grid search
cv = GridSearchCV(multi_output_classifier, parameters, cv=3)
cv.fit(train_features, train_labels)

# Display the results
print_result(cv)

BEST PARAMETERS: {'estimator__max_depth': 4, 'estimator__n_estimators': 50}

0.05 (+/- 0.082) for {'estimator__max_depth': 2, 'estimator__n_estimators': 5}
0.117 (+/- 0.125) for {'estimator__max_depth': 2, 'estimator__n_estimators': 50}
0.117 (+/- 0.125) for {'estimator__max_depth': 2, 'estimator__n_estimators': 250}
0.217 (+/- 0.17) for {'estimator__max_depth': 4, 'estimator__n_estimators': 5}
0.233 (+/- 0.205) for {'estimator__max_depth': 4, 'estimator__n_estimators': 50}
0.217 (+/- 0.205) for {'estimator__max_depth': 4, 'estimator__n_estimators': 250}
0.183 (+/- 0.205) for {'estimator__max_depth': 8, 'estimator__n_estimators': 5}
0.2 (+/- 0.163) for {'estimator__max_depth': 8, 'estimator__n_estimators': 50}
0.233 (+/- 0.205) for {'estimator__max_depth': 8, 'estimator__n_estimators': 250}
0.167 (+/- 0.262) for {'estimator__max_depth': 16, 'estimator__n_estimators': 5}
0.233 (+/- 0.205) for {'estimator__max_depth': 16, 'estimator__n_estimators': 50}
0.217 (+/- 0.205) for {'estimator__

In [16]:
joblib.dump(cv.best_estimator_,'Multi_output_RF_model.pkl')

['Multi_output_RF_model.pkl']