In [2]:
import joblib
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputClassifier
import warnings
# Ignore FutureWarning and DeprecationWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [3]:
target_variables = ['O_Cor', 'I_Cor', 'P_Co', 'Top_Cat', 'Low_Cat']

train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')[target_variables]

In [4]:
def print_result(results):
    # Display the best parameters found during grid search
    print('BEST PARAMETERS: {}\n'.format(results.best_params_))

    # Extract mean test scores, standard deviations, and corresponding parameter sets
    mean_scores = results.cv_results_['mean_test_score']
    std_scores = results.cv_results_['std_test_score']
    parameters = results.cv_results_['params']

    # Display mean test scores, along with their standard deviations, for each parameter set
    for mean, std, params in zip(mean_scores, std_scores, parameters):
        print('{} (+/- {}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [5]:
# Create MultiOutputClassifier with an inner KNN classifier
knn = KNeighborsClassifier()
multi_output_classifier = MultiOutputClassifier(knn)

# Define parameters for grid search
parameters = {
    'estimator__n_neighbors': [3, 5, 7],
    'estimator__weights': ['uniform', 'distance'],
    'estimator__p': [1, 2]  # p=1 for Manhattan distance, p=2 for Euclidean distance
}

# Perform grid search
cv = GridSearchCV(multi_output_classifier, parameters, cv=3)
cv.fit(train_features, train_labels)

# Display the results
print_result(cv)

BEST PARAMETERS: {'estimator__n_neighbors': 3, 'estimator__p': 1, 'estimator__weights': 'distance'}

0.117 (+/- 0.125) for {'estimator__n_neighbors': 3, 'estimator__p': 1, 'estimator__weights': 'uniform'}
0.233 (+/- 0.17) for {'estimator__n_neighbors': 3, 'estimator__p': 1, 'estimator__weights': 'distance'}
0.133 (+/- 0.094) for {'estimator__n_neighbors': 3, 'estimator__p': 2, 'estimator__weights': 'uniform'}
0.233 (+/- 0.094) for {'estimator__n_neighbors': 3, 'estimator__p': 2, 'estimator__weights': 'distance'}
0.117 (+/- 0.125) for {'estimator__n_neighbors': 5, 'estimator__p': 1, 'estimator__weights': 'uniform'}
0.2 (+/- 0.245) for {'estimator__n_neighbors': 5, 'estimator__p': 1, 'estimator__weights': 'distance'}
0.117 (+/- 0.125) for {'estimator__n_neighbors': 5, 'estimator__p': 2, 'estimator__weights': 'uniform'}
0.2 (+/- 0.163) for {'estimator__n_neighbors': 5, 'estimator__p': 2, 'estimator__weights': 'distance'}
0.133 (+/- 0.094) for {'estimator__n_neighbors': 7, 'estimator__p': 

In [6]:
joblib.dump(cv.best_estimator_,'Multi_output_KNN_model.pkl')

['Multi_output_KNN_model.pkl']