In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from model_knn import KnnModel
from model_svm import SVM_Model
from model_multilog import MultilogRegression
from model_bayes import BayesianClassifier
from model_decisiontree import DecisionTreeClassifier
from metrics import return_precision, show_metrics_matrix, return_accuracy, return_recall, return_f1
from image_feature_detector import PCA_transform, get_features, get_plain_data
from model_runner import ModelRunner

In [2]:
# Data
training_data = np.genfromtxt("datasets/medium-train.csv",
                            delimiter=",", filling_values=0)
evaluation_data = np.genfromtxt(
    "datasets/medium-test.csv", delimiter=",", filling_values=0)

evaluation_input = evaluation_data[:, 1:]
evaluation_answers = evaluation_data[:, 0]

my_metrics = [return_accuracy, return_recall, return_precision, return_f1]

# Сравнение моделей уменьшения размерности данных

In [3]:
# PCA
PCA_extreme = PCA_transform(24).fit(training_data) # x32

PCA_severe = PCA_transform(49).fit(training_data) # x16

PCA_rought = PCA_transform(98).fit(training_data) # x8

PCA_medium = PCA_transform(196).fit(training_data) # x4

PCA_fine = PCA_transform(392).fit(training_data) # x2

params_to_change = {
    'data_converter': [get_plain_data, get_features, PCA_extreme, PCA_severe, PCA_rought, PCA_medium, PCA_fine]
}

In [4]:
# Logistic

hp = {
    'data_converter': get_plain_data,
    'normalization': True,
    'shift_column': True,
    'learning_rate': 0.05,
    'batch_size': 500,
    'epochs': 300,
    'num_classes': 26,
    'reg': 'l1',
    'reg_w': 0.01,
}


MultilogRunner = ModelRunner(MultilogRegression, defaults=hp, metrics=my_metrics)
MultilogRunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f3ef7198700>                                                      
on 0: ~fit complete in 242.437s                                                                                         
on 0: ~eval complete in 0.170s                                                                                          
on 0:     return_accuracy = 0.551                                                                                       
on 0:     return_recall = 0.676                                                                                         
on 0:     return_precision = 0.478                                                                                      
on 0:     return_f1 = 0.424                                                                                             
on 0: -----End with-----        

In [5]:
# KNN
hp = {
    'data_converter': get_plain_data,
    'k': 3,
}


KNNrunner = ModelRunner(KnnModel, defaults=hp, metrics=my_metrics)
KNNrunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f3ef7198700>                                                      
on 0: ~fit complete in 0.028s                                                                                           
on 0: ~eval complete in 43.362s                                                                                         
on 0:     return_accuracy = 0.346                                                                                       
on 0:     return_recall = 0.516                                                                                         
on 0:     return_precision = 0.407                                                                                      
on 0:     return_f1 = 0.256                                                                                             
on 0: -----End with-----        

In [6]:
# SVM
hp = {
    'data_converter': PCA_rought,
    'num_classes': 26,
    'epochs': 100,
    'batch_size': 1000,
    'learning_rate': 0.01,
    'regularization': 0.0005,
    'normalization': True,
    'shift_column': True,
}


SVMRunner = ModelRunner(SVM_Model, defaults=hp, metrics=my_metrics)
SVMRunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f3ef7198700>                                                      
on 0: ~fit complete in 25.596s                                                                                          
on 0: ~eval complete in 0.072s                                                                                          
on 0:     return_accuracy = 0.361                                                                                       
on 0:     return_recall = 0.518                                                                                         
on 0:     return_precision = 0.466                                                                                      
on 0:     return_f1 = 0.277                                                                                             
on 0: -----End with-----        

In [7]:
# Bayes

hp = {
    'data_converter': PCA_rought,
    'num_classes': 26,
}


BayesRunner = ModelRunner(BayesianClassifier, defaults=hp, metrics=my_metrics)
BayesRunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <function get_plain_data at 0x7f3ef7198700>                                                      
on 0: ~fit complete in 0.087s                                                                                           
on 0: ~eval complete in 0.127s                                                                                          
on 0:     return_accuracy = 0.368                                                                                       
on 0:     return_recall = 0.509                                                                                         
on 0:     return_precision = 0.507                                                                                      
on 0:     return_f1 = 0.267                                                                                             
on 0: -----End with-----        

        prob_inverse = np.power(prob_in, -1)/10000


on 1: -----With parameters-----                                                                                         
on 1: data_converter = <function get_features at 0x7f3ef7198670>                                                        
on 1: ~fit complete in 5.071s                                                                                           
on 1: ~eval complete in 2.604s                                                                                          
on 1:     return_accuracy = 0.055                                                                                       
on 1:     return_recall = 0.101                                                                                         
on 1:     return_precision = 0.757                                                                                      
on 1:     return_f1 = 0.008                                                                                             
on 1: -----End with-----        

In [8]:
assert(False)

AssertionError: 

# Поиск оптимальных гиперпараметров моделей

In [None]:
# Data

training_data = np.genfromtxt("datasets/emnist-letters-train.csv",
                           delimiter=",", filling_values=0)
evaluation_data = np.genfromtxt(
   "datasets/emnist-letters-test.csv", delimiter=",", filling_values=0)
evaluation_input = evaluation_data[:, 1:]
evaluation_answers = evaluation_data[:, 0]

my_metrics = [return_accuracy, return_recall, return_precision, return_f1]

In [None]:
# Logistic

hp = {
    'data_converter': get_plain_data,
    'normalization': True,
    'shift_column': True,
    'learning_rate': 0.02,
    'batch_size': 500,
    'epochs': 300,
    'num_classes': 26,
    'reg': 'l1',
    'reg_w': 0.01,
}


MultilogRunner = ModelRunner(MultilogRegression, defaults=hp, metrics=my_metrics)
params_to_change = {
    'learning_rate': [0.01, 0.02, 0.05],
    'epochs': [300, 200, 100],
}
MultilogRunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

In [None]:
params_to_change = {
    'reg': [None, 'l1', 'l2'],
    'reg_w': [0.01],
}
MultilogRunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=False)

In [None]:
# KNN
hp = {
    'data_converter': get_plain_data,
    'k': 3,
}


KNNrunner = ModelRunner(KnnModel, defaults=hp, metrics=my_metrics)
params_to_change = {
    'k': [1,3,5,7,10]
}
KNNrunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

In [None]:
# Bayes

hp = {
    'data_converter': get_plain_data,
    'num_classes': 26,
}


BayesRunner = ModelRunner(BayessianClassifier, defaults=hp, metrics=my_metrics)

BayesRunner.run(training_data, evaluation_input, evaluation_answers, hp, one_vs_one=True)

In [None]:
# SVM

hp = {
    'data_converter': get_plain_data,
    'num_classes': 26,
    'epochs': 200,
    'batch_size': 1000,
    'learning_rate': 0.01,
    'regularization': 0.01,
    'normalization': True,
    'shift_column': True,
}


SVMRunner = ModelRunner(SVM_Model, defaults=hp, metrics=my_metrics)
params_to_change = {
    'learning_rate': [0.001, 0.01, 0.05],
    'epochs': [100, 200, 300],
}
SVMRunner.run(training_data, evaluation_input, evaluation_answers, params_to_change, one_vs_one=True)

In [4]:
# Tree

hp = {
        'data_converter': PCA_severe,
        'sample_len': 32,
        'num_classes': 26,
        'window_size': -1,
        'min_samples': 3,
        'max_depth': 7,
        'tree_type': 'multilabel_ovo',
}

TreeRunner = ModelRunner(DecisionTreeClassifier,
                         defaults=hp, metrics=my_metrics)
params_to_change = {
    'data_converter': [PCA_extreme, PCA_severe, PCA_rought]
}
TreeRunner.run(training_data, evaluation_input,
               evaluation_answers, params_to_change, one_vs_one=False)

on 0: -----With parameters-----                                                                                         
on 0: data_converter = <PCA_transform(24);vector:(784, 24)>                                                             
on 0: ~fit complete in                                                                                                  
                                                                                                                        

        log_start = _nx.log10(start)
        self.right_mean = data[is_true][:, -1].mean()
        ret = ret.dtype.type(ret / rcount)
        self.left_mean = data[is_false][:, -1].mean()


Проверка модели DecisionTreeClassifier |⚠︎                                       | (!) 0/3 [0%] in 45:45.5 (0.00/s)      


KeyboardInterrupt: 