In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from model_knn import KnnModel
from model_svm import SVM_Model
from model_multilog import MultilogRegression
from model_bayes import BayesianClassifier
from model_decisiontree import DecisionTreeClassifier
from metrics import (return_precision, show_metrics_matrix, return_accuracy, return_recall, 
                     return_f1, return_roc_auc_ovr)
from image_feature_detector import PCA_transform, get_features, get_plain_data, threshold_mid, convert_to_emnist
from model_runner import ModelRunner

In [2]:
# Data

#lite data
training_data_lite = np.genfromtxt("datasets/light-train.csv",
                            delimiter=",", filling_values=0)
evaluation_data_lite = np.genfromtxt(
    "datasets/medium-test.csv", delimiter=",", filling_values=0)
evaluation_input_lite = evaluation_data_lite[:, 1:]
evaluation_answers_lite = evaluation_data_lite[:, 0]

datapack_lite = (training_data_lite, evaluation_input_lite, evaluation_answers_lite)

#medium size
training_data_mid = np.genfromtxt("datasets/light-train.csv",
                            delimiter=",", filling_values=0)
evaluation_data_mid = np.genfromtxt(
    "datasets/medium-test.csv", delimiter=",", filling_values=0)
evaluation_input_mid = evaluation_data_mid[:, 1:]
evaluation_answers_mid = evaluation_data_mid[:, 0]

datapack_mid = (training_data_mid, evaluation_input_mid, evaluation_answers_mid)

# full size
training_data = np.genfromtxt("datasets/emnist-letters-train.csv",
                            delimiter=",", filling_values=0)
evaluation_data = np.genfromtxt(
    "datasets/emnist-letters-test.csv", delimiter=",", filling_values=0)
evaluation_input = evaluation_data[:, 1:]
evaluation_answers = evaluation_data[:, 0]

datapack_full = (training_data, evaluation_input, evaluation_answers)

In [3]:
# metrics
my_metrics = [return_accuracy, return_recall, return_precision, return_f1]

# Сравнение моделей уменьшения размерности данных

In [4]:
# get_plain_data --> np.ndarray with shape(784,)

# get_features --> np.ndarray with shape(13,)

# PCA
PCA_extreme = PCA_transform(24).fit(training_data) # x32 - выходной массив имеет в 32 раза меньше размерности

PCA_severe = PCA_transform(49).fit(training_data) # x16

PCA_rought = PCA_transform(98).fit(training_data) # x8

PCA_medium = PCA_transform(196).fit(training_data) # x4

PCA_fine = PCA_transform(392).fit(training_data) # x2

params_to_change = {
    'data_converter': [get_plain_data, get_features, PCA_extreme, PCA_severe, PCA_rought, PCA_medium, PCA_fine],
}

In [5]:
# Logistic

hp = {
    'data_converter': get_plain_data,
    'normalization': True,
    'shift_column': True,
    'learning_rate': 0.05,
    'batch_size': 300,
    'epochs': 300,
    'num_classes': 26,
    'reg': 'l1',
    'reg_w': 0.05,
}


MultilogRunner = ModelRunner(MultilogRegression, defaults=hp, metrics=my_metrics, responsive_bar=True)
MultilogRunner.run(*datapack_mid, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f923ede8940>                                                      
on 0: ~fit complete in                                                                                                  
on 300: 8.972s                                                                                                          
on 300: ~eval complete in 0.159s                                                                                        
on 300:     return_accuracy = 0.560                                                                                     
on 300:     return_recall = 0.683                                                                                       
on 300:     return_precision = 0.466                                                                                    
on 300:     return_f1 = 0.433   

on 2106:     return_precision = 0.313                                                                                    
on 2106:     return_f1 = 0.273                                                                                           
on 2106: -----End with-----                                                                                             
Проверка модели MultilogRegression |████████████████████████████████████████✗︎ (!) 2107/2100 [100%] in 48.4s (43.55/s)   
On iteration 0:
With hyperparameters: [<function get_plain_data at 0x7f923ede8940>]
Got metrics: [0.5604, 0.6833872922517765, 0.46563957655496757, 0.433005939182378]


In [6]:
# KNN
hp = {
    'data_converter': get_plain_data,
    'k': 1,
}


KNNrunner = ModelRunner(KnnModel, defaults=hp, metrics=my_metrics, responsive_bar=True)
KNNrunner.run(*datapack_mid, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f923ede8940>                                                      
on 0: ~fit complete in 0.004s                                                                                           
on 0: ~eval complete in                                                                                                 
on 5000: 3.505s                                                                                                         
on 5000:     return_accuracy = 0.233                                                                                    
on 5000:     return_recall = 0.437                                                                                      
on 5000:     return_precision = 0.367                                                                                   
on 5000:     return_f1 = 0.185  

on 35006:     return_precision = 0.488                                                                                  
on 35006:     return_f1 = 0.454                                                                                         
on 35006: -----End with-----                                                                                            
Проверка модели KnnModel |████████████████████████████████████████| 35007/35007 [100%] in 22.8s (1535.25/s)             
On iteration 3:
With hyperparameters: [<PCA_transform(49);vector:(784, 49)>]
Got metrics: [0.6292, 0.7339819667849938, 0.49995519026928925, 0.4768180509485743]


In [7]:
# SVM
hp = {
    'data_converter': get_plain_data,
    'num_classes': 26,
    'epochs': 100,
    'batch_size': 1000,
    'learning_rate': 0.01,
    'regularization': 0.0005,
    'normalization': True,
    'shift_column': True,
}


SVMRunner = ModelRunner(SVM_Model, defaults=hp, metrics=my_metrics, responsive_bar=True)
SVMRunner.run(*datapack_mid, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f923ede8940>                                                      
on 0: ~fit complete in                                                                                                  
on 100: 2.332s                                                                                                          
on 100: ~eval complete in 0.077s                                                                                        
on 100:     return_accuracy = 0.434                                                                                     
on 100:     return_recall = 0.591                                                                                       
on 100:     return_precision = 0.424                                                                                    
on 100:     return_f1 = 0.330   

on 706:     return_precision = 0.318                                                                                    
on 706:     return_f1 = 0.271                                                                                           
on 706: -----End with-----                                                                                              
Проверка модели SVM_Model |████████████████████████████████████████✗︎ (!) 707/700 [101%] in 15.6s (45.18/s)              
On iteration 2:
With hyperparameters: [<PCA_transform(24);vector:(784, 24)>]
Got metrics: [0.4754, 0.6221787483547424, 0.4192542449065625, 0.3548350002631533]


In [8]:
# Bayes

hp = {
    'data_converter': get_plain_data,
    'num_classes': 26,
}


BayesRunner = ModelRunner(BayesianClassifier, defaults=hp, metrics=my_metrics)
BayesRunner.run(*datapack_mid, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f923ede8940>                                                      
on 0: ~fit complete in 0.010s                                                                                           
on 0: ~eval complete in 0.123s                                                                                          
on 0:     return_accuracy = 0.270                                                                                       
on 0:     return_recall = 0.396                                                                                         
on 0:     return_precision = 0.520                                                                                      
on 0:     return_f1 = 0.196                                                                                             
on 0: -----End with-----        

        prob_inverse = np.power(prob_in, -1)/10000


on 1: -----With parameters-----                                                                                         
on 1: data_converter = <function get_features at 0x7f923ede8550>                                                        
on 1: ~fit complete in 0.503s                                                                                           
on 1: ~eval complete in 2.549s                                                                                          
on 1:     return_accuracy = 0.056                                                                                       
on 1:     return_recall = 0.099                                                                                         
on 1:     return_precision = 0.809                                                                                      
on 1:     return_f1 = 0.006                                                                                             
on 1: -----End with-----        

        softvals = (np.exp(mod_array - np.max(x)).T /


on 3: -----With parameters-----                                                                                         
on 3: data_converter = <PCA_transform(49);vector:(784, 49)>                                                             
on 3: ~fit complete in 0.047s                                                                                           
on 3: ~eval complete in 0.192s                                                                                          
on 3:     return_accuracy = 0.036                                                                                       
on 3:     return_recall = 0.204                                                                                         
on 3:     return_precision = 0.373                                                                                      
on 3:     return_f1 = 0.012                                                                                             
on 3: -----End with-----        

        softvals = (np.exp(mod_array - np.max(x)).T /


on 4: -----With parameters-----                                                                                         
on 4: data_converter = <PCA_transform(98);vector:(784, 98)>                                                             
on 4: ~fit complete in 0.038s                                                                                           
on 4: ~eval complete in 0.285s                                                                                          
on 4:     return_accuracy = 0.040                                                                                       
on 4:     return_recall = 0.170                                                                                         
on 4:     return_precision = 0.288                                                                                      
on 4:     return_f1 = 0.009                                                                                             
on 4: -----End with-----        

        softvals = (np.exp(mod_array - np.max(x)).T /


on 5: -----With parameters-----                                                                                         
on 5: data_converter = <PCA_transform(196);vector:(784, 196)>                                                           
on 5: ~fit complete in 0.037s                                                                                           
on 5: ~eval complete in 0.230s                                                                                          
on 5:     return_accuracy = 0.053                                                                                       
on 5:     return_recall = 0.143                                                                                         
on 5:     return_precision = 0.538                                                                                      
on 5:     return_f1 = 0.007                                                                                             
on 5: -----End with-----        

        softvals = (np.exp(mod_array - np.max(x)).T /


on 6: -----With parameters-----                                                                                         
on 6: data_converter = <PCA_transform(392);vector:(784, 392)>                                                           
on 6: ~fit complete in 0.057s                                                                                           
on 6: ~eval complete in 0.282s                                                                                          
on 6:     return_accuracy = 0.053                                                                                       
on 6:     return_recall = 0.217                                                                                         
on 6:     return_precision = 0.633                                                                                      
on 6:     return_f1 = 0.005                                                                                             
on 6: -----End with-----        

        softvals = (np.exp(mod_array - np.max(x)).T /


Проверка модели BayesianClassifier |████████████████████████████████████████| 7/7 [100%] in 4.9s (1.43/s)               
On iteration 0:
With hyperparameters: [<function get_plain_data at 0x7f923ede8940>]
Got metrics: [0.2702, 0.3956182433071156, 0.5202723933041188, 0.19553933860467618]


In [9]:
# Decision Tree

hp = {
        'data_converter': PCA_severe,
        'sample_len': 16,
        'num_classes': 26,
        'window_size': -1,
        'min_samples': 4,
        'max_depth': 6,
        'tree_type': 'multilabel_ovr',
}

TreeRunner = ModelRunner(DecisionTreeClassifier,
                         defaults=hp, metrics=my_metrics, responsive_bar=True)
TreeRunner.run(*datapack_mid, params_to_change, one_vs_one=False)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f923ede8940>                                                      
on 0: ~fit complete in                                                                                                  
                                                                                                                        

        self.right_mean = data[is_true][:, -1].mean()
        ret = ret.dtype.type(ret / rcount)
        self.left_mean = data[is_false][:, -1].mean()


on 26: 5535.868s                                                                                                        
on 26: ~eval complete in 0.321s                                                                                         
on 26:     return_accuracy = 0.344                                                                                      
on 26:     return_recall = 0.521                                                                                        
on 26:     return_precision = 0.297                                                                                     
on 26:     return_f1 = 0.268                                                                                            
on 27: -----End with-----                                                                                               
on 27: -----With parameters-----                                                                                        
on 27: data_converter = <functio

         self.left_mean = data[is_false][:, -1].mean()
         ret = ret.dtype.type(ret / rcount)
         self.right_mean = data[is_true][:, -1].mean()


on 53: 48.067s                                                                                                          
on 53: ~eval complete in 2.579s                                                                                         
on 53:     return_accuracy = 0.208                                                                                      
on 53:     return_recall = 0.424                                                                                        
on 53:     return_precision = 0.190                                                                                     
on 53:     return_f1 = 0.165                                                                                            
on 54: -----End with-----                                                                                               
on 54: -----With parameters-----                                                                                        
on 54: data_converter = <PCA_tra

         self.right_mean = data[is_true][:, -1].mean()
         ret = ret.dtype.type(ret / rcount)
         self.left_mean = data[is_false][:, -1].mean()


on 80: 71.243s                                                                                                          
on 80: ~eval complete in 0.329s                                                                                         
on 80:     return_accuracy = 0.336                                                                                      
on 80:     return_recall = 0.518                                                                                        
on 80:     return_precision = 0.289                                                                                     
on 80:     return_f1 = 0.264                                                                                            
on 81: -----End with-----                                                                                               
on 81: -----With parameters-----                                                                                        
on 81: data_converter = <PCA_tra

         self.right_mean = data[is_true][:, -1].mean()
         ret = ret.dtype.type(ret / rcount)
         self.left_mean = data[is_false][:, -1].mean()


on 107: 154.429s                                                                                                        
on 107: ~eval complete in 0.331s                                                                                        
on 107:     return_accuracy = 0.301                                                                                     
on 107:     return_recall = 0.493                                                                                       
on 107:     return_precision = 0.269                                                                                    
on 107:     return_f1 = 0.239                                                                                           
on 108: -----End with-----                                                                                              
on 108: -----With parameters-----                                                                                       
on 108: data_converter = <PCA_tr

          self.right_mean = data[is_true][:, -1].mean()
          ret = ret.dtype.type(ret / rcount)
          self.left_mean = data[is_false][:, -1].mean()


on 134: 295.117s                                                                                                        
on 134: ~eval complete in 0.344s                                                                                        
on 134:     return_accuracy = 0.289                                                                                     
on 134:     return_recall = 0.480                                                                                       
on 134:     return_precision = 0.257                                                                                    
on 134:     return_f1 = 0.230                                                                                           
on 135: -----End with-----                                                                                              
on 135: -----With parameters-----                                                                                       
on 135: data_converter = <PCA_tr

          self.right_mean = data[is_true][:, -1].mean()
          ret = ret.dtype.type(ret / rcount)
          self.left_mean = data[is_false][:, -1].mean()


on 161: 620.239s                                                                                                        
on 161: ~eval complete in 0.375s                                                                                        
on 161:     return_accuracy = 0.269                                                                                     
on 161:     return_recall = 0.465                                                                                       
on 161:     return_precision = 0.248                                                                                    
on 161:     return_f1 = 0.216                                                                                           
on 162: -----End with-----                                                                                              
on 162: -----With parameters-----                                                                                       
on 162: data_converter = <PCA_tr

          self.right_mean = data[is_true][:, -1].mean()
          ret = ret.dtype.type(ret / rcount)
          self.left_mean = data[is_false][:, -1].mean()


on 188: 1399.767s                                                                                                        
on 188: ~eval complete in 0.454s                                                                                         
on 188:     return_accuracy = 0.254                                                                                      
on 188:     return_recall = 0.455                                                                                        
on 188:     return_precision = 0.223                                                                                     
on 188:     return_f1 = 0.198                                                                                            
on 189: -----End with-----                                                                                              
Проверка модели DecisionTreeClassifier |████████████████████████████████████████✗︎ (!) 189/182 [104%] in 2:15:29.7 (0.02/
On iteration 0:
With hype

# Поиск оптимальных гиперпараметров моделей

In [10]:
# Logistic

hp = {
    'data_converter': PCA_severe,
    'normalization': True,
    'shift_column': True,
    'learning_rate': 1,
    'batch_size': 50,
    'epochs': 300,
    'num_classes': 26,
    'reg': None,
    'reg_w': 0.01,
}


MultilogRunner = ModelRunner(MultilogRegression, defaults=hp, metrics=my_metrics, responsive_bar=True)
params_to_change = {
    'learning_rate': [0.2, 0.5, 1],
    'epochs': [400, 250, 150],
}
MultilogRunner.run(*datapack_lite, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: learning_rate = 0.1                                                                                               
on 0: epochs = 300                                                                                                      
on 0: ~fit complete in                                                                                                  
on 300: 7.953s                                                                                                          
on 300: ~eval complete in 0.191s                                                                                        
on 300:     return_accuracy = 0.286                                                                                     
on 300:     return_recall = 0.461                                                                                       
on 300:     return_precision = 0

on 1206: learning_rate = 0.8                                                                                            
on 1206: epochs = 300                                                                                                   
on 1206: ~fit complete in                                                                                               
on 1506: 8.090s                                                                                                         
on 1506: ~eval complete in 0.193s                                                                                       
on 1506:     return_accuracy = 0.442                                                                                    
on 1506:     return_recall = 0.600                                                                                      
on 1506:     return_precision = 0.377                                                                                   
on 1506:     return_f1 = 0.343  

In [11]:
params_to_change = {
    'reg': [None, 'l1', 'l2'],
    'reg_w': [0.05],
}
MultilogRunner.run(*datapack_lite, params_to_change, one_vs_one=False)

on 0: -----With parameters-----                                                                                         
on 0: reg = None                                                                                                        
on 0: reg_w = 0.05                                                                                                      
on 0: ~fit complete in                                                                                                  
on 300: 8.892s                                                                                                          
on 300: ~eval complete in 0.203s                                                                                        
on 300:     return_accuracy = 0.387                                                                                     
on 300:     return_recall = 0.558                                                                                       
on 300:     return_precision = 0

In [12]:
# KNN

hp = {
    'data_converter': PCA_severe,
    'k': 3,
}


KNNrunner = ModelRunner(KnnModel, defaults=hp, metrics=my_metrics, responsive_bar=True)
params_to_change = {
    'k': [1,3,5,7,10]
}
KNNrunner.run(*datapack_lite, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: k = 1                                                                                                             
on 0: ~fit complete in 0.050s                                                                                           
on 0: ~eval complete in                                                                                                 
on 5000: 1.709s                                                                                                         
on 5000:     return_accuracy = 0.629                                                                                    
on 5000:     return_recall = 0.734                                                                                      
on 5000:     return_precision = 0.500                                                                                   
on 5000:     return_f1 = 0.477  

In [13]:
# Bayes

hp = {
    'data_converter': threshold_mid,
    'num_classes': 26,
}


BayesRunner = ModelRunner(BayesianClassifier, defaults=hp, metrics=my_metrics)
params_to_change = {
    'data_converter': [get_plain_data, threshold_mid],
}
BayesRunner.run(*datapack_mid, params_to_change, one_vs_one=False)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f923ede8940>                                                      
on 0: ~fit complete in 0.030s                                                                                           
on 0: ~eval complete in 0.122s                                                                                          
on 0:     return_accuracy = 0.270                                                                                       
on 0:     return_recall = 0.396                                                                                         
on 0:     return_precision = 0.520                                                                                      
on 0:     return_f1 = 0.196                                                                                             
                                

        prob_inverse = np.power(prob_in, -1)/10000


on 1: -----End with-----                                                                                                
on 1: -----With parameters-----                                                                                         
on 1: data_converter = <function threshold_mid at 0x7f923ede89d0>                                                       
on 1: ~fit complete in 0.024s                                                                                           
on 1: ~eval complete in 0.184s                                                                                          
on 1:     return_accuracy = 0.278                                                                                       
on 1:     return_recall = 0.428                                                                                         
on 1:     return_precision = 0.433                                                                                      
on 1:     return_f1 = 0.198     

        prob_inverse = np.power(prob_in, -1)/10000


on 2: -----End with-----                                                                                                
Проверка модели BayesianClassifier |████████████████████████████████████████| 2/2 [100%] in 0.4s (4.86/s)               
On iteration 0:
With hyperparameters: [<function get_plain_data at 0x7f923ede8940>]
Got metrics: [0.2702, 0.3956182433071156, 0.5202723933041188, 0.19553933860467618]


In [14]:
# SVM

hp = {
    'data_converter': PCA_extreme,
    'num_classes': 26,
    'epochs': 200,
    'batch_size': 100,
    'learning_rate': 0.01,
    'regularization': 0.0005,
    'normalization': True,
    'shift_column': True,
}


SVMRunner = ModelRunner(SVM_Model, defaults=hp, metrics=my_metrics, responsive_bar=True)
params_to_change = {
    'learning_rate': [0.001, 0.01, 0.1],
    'epochs': [50, 150, 450],
}
SVMRunner.run(*datapack_mid, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: learning_rate = 0.001                                                                                             
on 0: epochs = 50                                                                                                       
on 0: ~fit complete in                                                                                                  
on 50: 0.758s                                                                                                           
on 50: ~eval complete in 0.092s                                                                                         
on 50:     return_accuracy = 0.476                                                                                      
on 50:     return_recall = 0.623                                                                                        
on 50:     return_precision = 0.

on 1306: learning_rate = 0.1                                                                                            
on 1306: epochs = 50                                                                                                    
on 1306: ~fit complete in                                                                                               
on 1356: 0.704s                                                                                                         
on 1356: ~eval complete in 0.084s                                                                                       
on 1356:     return_accuracy = 0.476                                                                                    
on 1356:     return_recall = 0.623                                                                                      
on 1356:     return_precision = 0.420                                                                                   
on 1356:     return_f1 = 0.356  

In [None]:
# Tree
# lite data
hp = {
        'data_converter': PCA_extreme,
        'sample_len': 32,
        'num_classes': 26,
        'window_size': -1,
        'min_samples': 3,
        'max_depth': 7,
        'tree_type': 'multilabel_ovo',
}

TreeRunner = ModelRunner(DecisionTreeClassifier,
                         defaults=hp, metrics=my_metrics, responsive_bar=True)
params_to_change = {
    'tree_type': ['multilabel_ovo', 'multilabel_ovr'],
    'sample_len': [8, 64],
}
TreeRunner.run(*datapack_lite, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: tree_type = multilabel_ovo                                                                                        
on 0: sample_len = 8                                                                                                    
on 0: ~fit complete in                                                                                                  
                                                                                                                        

        self.right_mean = data[is_true][:, -1].mean()
        ret = ret.dtype.type(ret / rcount)
        self.left_mean = data[is_false][:, -1].mean()


on 325: 632.535s                                                                                                        
on 325: ~eval complete in 3.524s                                                                                        
on 325:     return_accuracy = 0.344                                                                                     
on 325:     return_recall = 0.498                                                                                       
on 325:     return_precision = 0.287                                                                                    
on 325:     return_f1 = 0.265                                                                                           
on 325: -----End with-----                                                                                              
on 326: -----With parameters-----                                                                                       
on 326: tree_type = multilabel_o

          self.right_mean = data[is_true][:, -1].mean()
          ret = ret.dtype.type(ret / rcount)
          self.left_mean = data[is_false][:, -1].mean()


Проверка модели DecisionTreeClassifier |███████████████████▃                    | ▁▃▅ 629/1300 [48%] in 1:13:15 (0.1/s, 

In [None]:
params_to_change = {
    'window_size': [5, 25],
}
TreeRunner.run(*datapack_lite, params_to_change, one_vs_one=True)

In [None]:
# Tree
# medium sized data
hp = {
        'data_converter': PCA_extreme,
        'sample_len': 32,
        'num_classes': 26,
        'window_size': -1,
        'min_samples': 4,
        'max_depth': 8,
        'tree_type': 'multilabel_ovo',
}

TreeRunner = ModelRunner(DecisionTreeClassifier,
                         defaults=hp, metrics=my_metrics, responsive_bar=True)
params_to_change = {
    'tree_type': ['multilabel_ovo', 'multilabel_ovr'],
}
TreeRunner.run(*datapack_mid, params_to_change, one_vs_one=False)

In [None]:
# Tree
# full sized data
hp = {
        'data_converter': PCA_extreme,
        'sample_len': 32,
        'num_classes': 26,
        'window_size': -1,
        'min_samples': 5,
        'max_depth': 9,
        'tree_type': 'multilabel_ovo',
}

TreeRunner = ModelRunner(DecisionTreeClassifier,
                         defaults=hp, metrics=my_metrics, responsive_bar=True)
params_to_change = {
    'tree_type': ['multilabel_ovo', 'multilabel_ovr'],
}
TreeRunner.run(*datapack_full, params_to_change, one_vs_one=False)

# Проверка моделей с оптимальными параметрами на полном наборе данных

In [None]:
assert(False)

# Получение предсказаний из набора данных промежуточной аттестации

In [None]:
evaluation_input = np.genfromtxt(
    "datasets/emnist-letters-test-data.csv", delimiter=",", filling_values=0)

evaluation_labels = evaluation_input[1:, 0]
evaluation_input = evaluation_input[1:, 1:]
evaluation_input = np.array(list(map(convert_to_emnist, evaluation_input)))

evaluation_answers = evaluation_labels//800 + 1
model = KNNrunner.get_models()[3]
preds = model.predict(evaluation_input)
print(return_accuracy(preds, evaluation_answers))

In [None]:
import csv

header = ['id', 'class']
pos = np.arange(20800)[:, np.newaxis].astype(np.uint32)
data = np.hstack((pos, preds[:, np.newaxis])).astype(np.uint32)

with open('Suleimanov_KNN_predictions.csv','w', newline="") as file:
    writer = csv.writer(
    file, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
    writer.writerow(header)
    writer.writerows(data)