In [5]:
import joblib
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import warnings 
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=DeprecationWarning)

In [6]:
train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')

In [7]:
def print_result(results):
    # Display the best parameters found during grid search
    print('BEST PARAMETERS: {}\n'.format(results.best_params_))

    # Extract mean test scores, standard deviations, and corresponding parameter sets
    mean_scores = results.cv_results_['mean_test_score']
    std_scores = results.cv_results_['std_test_score']
    parameters = results.cv_results_['params']

    # Display mean test scores, along with their standard deviations, for each parameter set
    for mean, std, params in zip(mean_scores, std_scores, parameters):
        print('{} (+/- {}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [9]:
# Create MultiOutputClassifier with an inner KNN classifier
knn = KNeighborsClassifier()


# Define parameters for grid search
parameters = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]  # p=1 for Manhattan distance, p=2 for Euclidean distance
}

# Perform grid search
cv = GridSearchCV(knn, parameters, cv=3)
cv.fit(train_features, train_labels.values.ravel())

# Display the results
print_result(cv)

BEST PARAMETERS: {'n_neighbors': 7, 'p': 2, 'weights': 'distance'}

0.593 (+/- 0.084) for {'n_neighbors': 3, 'p': 1, 'weights': 'uniform'}
0.611 (+/- 0.03) for {'n_neighbors': 3, 'p': 1, 'weights': 'distance'}
0.54 (+/- 0.169) for {'n_neighbors': 3, 'p': 2, 'weights': 'uniform'}
0.558 (+/- 0.119) for {'n_neighbors': 3, 'p': 2, 'weights': 'distance'}
0.541 (+/- 0.104) for {'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}
0.559 (+/- 0.061) for {'n_neighbors': 5, 'p': 1, 'weights': 'distance'}
0.559 (+/- 0.061) for {'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
0.576 (+/- 0.041) for {'n_neighbors': 5, 'p': 2, 'weights': 'distance'}
0.559 (+/- 0.131) for {'n_neighbors': 7, 'p': 1, 'weights': 'uniform'}
0.594 (+/- 0.134) for {'n_neighbors': 7, 'p': 1, 'weights': 'distance'}
0.595 (+/- 0.151) for {'n_neighbors': 7, 'p': 2, 'weights': 'uniform'}
0.612 (+/- 0.194) for {'n_neighbors': 7, 'p': 2, 'weights': 'distance'}


In [10]:
joblib.dump(cv.best_estimator_,'KNN_model.pkl')

['KNN_model.pkl']