## Grid Search for best hyperparameters of KNN model
esp. for n_neighbors, weights, power_param

In [3]:
import os
import json
import knn as knn_lib

datareader = knn_lib.DataReader(data_dir = 'data/',
                                train_valid_file = 'Data_train_validation_2019-1126.xlsx',                                
                                test_file = 'Data_test_2019-1126.xlsx',
                                feature_in = ['A1','A2','A3','A4','A5','A6','A7','A8','A9','A10','A11','A12','A13','A14','A15','A16','A17'],
                                output_col = 'Type',
                                k_fold = 5)

n_neighbors_list = [1,2,3,4,5,6,7]
weights_list = ['uniform','distance']
power_param_list = [1,2,3]

best_acc = 0
best_hyper_param = dict()
best_performance = dict()
for weights in weights_list:
    for n_neighbors in n_neighbors_list:
        for power_param in power_param_list:
            
            
            
            knn = knn_lib.KNN(n_neighbors = n_neighbors,
                              weights = weights,
                              power_param = power_param)
            
            config = knn_lib.configuration_grid_search(datareader, knn)
            
            output_dir = f'output/{datareader.k_fold}-fold_{weights}_n-{n_neighbors}_p-{power_param}'
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)    
            
            with open(output_dir+'/config.json', 'w') as fout:
                json.dump(config, fout, indent = 4)
            
            print (output_dir)
            acc_train_avg, acc_valid_avg, acc_test_avg = knn_lib.k_fold_validation(datareader, knn, output_dir)
            # print()

            ## find the best hyper param of knn model
            if acc_valid_avg > best_acc:
                best_hyper_param['weights'] = weights
                best_hyper_param['n_neighbors'] = n_neighbors
                best_hyper_param['power_param'] = power_param
                best_performance['acc_train_avg'] = acc_train_avg
                best_performance['acc_valid_avg'] = acc_valid_avg
                best_performance['acc_test_avg'] = acc_test_avg
                best_acc = acc_valid_avg

print ()
print ('best_hyper_param of knn model:')
print (best_hyper_param)
print ()
print ('best performance of knn model:')
print (f"average accuracy of train set in {datareader.k_fold}-fold: {best_performance['acc_train_avg']}")
print (f"average accuracy of valid set in {datareader.k_fold}-fold: {best_performance['acc_valid_avg']}")
print (f"average accuracy of test set in {datareader.k_fold}-fold: {best_performance['acc_test_avg']}")

output/5-fold_uniform_n-1_p-1
output/5-fold_uniform_n-1_p-2
output/5-fold_uniform_n-1_p-3
output/5-fold_uniform_n-2_p-1
output/5-fold_uniform_n-2_p-2
output/5-fold_uniform_n-2_p-3
output/5-fold_uniform_n-3_p-1
output/5-fold_uniform_n-3_p-2
output/5-fold_uniform_n-3_p-3
output/5-fold_uniform_n-4_p-1
output/5-fold_uniform_n-4_p-2
output/5-fold_uniform_n-4_p-3
output/5-fold_uniform_n-5_p-1
output/5-fold_uniform_n-5_p-2
output/5-fold_uniform_n-5_p-3
output/5-fold_uniform_n-6_p-1
output/5-fold_uniform_n-6_p-2
output/5-fold_uniform_n-6_p-3
output/5-fold_uniform_n-7_p-1
output/5-fold_uniform_n-7_p-2
output/5-fold_uniform_n-7_p-3
output/5-fold_distance_n-1_p-1
output/5-fold_distance_n-1_p-2
output/5-fold_distance_n-1_p-3
output/5-fold_distance_n-2_p-1
output/5-fold_distance_n-2_p-2
output/5-fold_distance_n-2_p-3
output/5-fold_distance_n-3_p-1
output/5-fold_distance_n-3_p-2
output/5-fold_distance_n-3_p-3
output/5-fold_distance_n-4_p-1
output/5-fold_distance_n-4_p-2
output/5-fold_distance_n-4_p-