## Libraries and versions

### Libraries

In [16]:
python_version = !Python -V #version 3.7.9
import pandas as pd #version 1.2.3
import numpy as np #version 1.19.2


#for machine learning models
#pre processing
from category_encoders import OneHotEncoder, OrdinalEncoder, __version__ as ce_version #version 2.2.2

#split data in train and test
#version
from sklearn import __version__ as sk_version #version 0.24.1

#pre-processing
from sklearn.model_selection import train_test_split

#classificators
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import CategoricalNB, ComplementNB
from sklearn.svm import SVC

#metrics
from sklearn.metrics import accuracy_score

#baseline
from sklearn.dummy import DummyClassifier

### Versions

In [2]:
print(f'{python_version[0]}')
print(f'Pandas version: {pd.__version__}')
print(f'Numpy version: {np.__version__}')
print(f'Category Encoders version: {ce_version}')
print(f'Sklearn version: {sk_version}')

Python 3.7.9
Pandas version: 1.2.3
Numpy version: 1.19.2
Category Encoders version: 2.2.2
Sklearn version: 0.24.1


## Configurations

In [3]:
import warnings
warnings.filterwarnings("ignore")

## Dataset

In [4]:
dataset = pd.read_csv('train_cleaning.csv')

In [5]:
dataset.head()

Unnamed: 0,ID,Gender,Ever_Married,Age,Graduated,Profession,Work_Experience,Spending_Score,Family_Size,Var_1,Segmentation
0,462809,Male,No,22,No,Healthcare,1.0,Low,4.0,Cat_4,D
1,462643,Female,Yes,38,Yes,Engineer,1.0,Average,3.0,Cat_4,A
2,466315,Female,Yes,67,Yes,Engineer,1.0,Low,1.0,Cat_6,B
3,461735,Male,Yes,67,Yes,Lawyer,0.0,High,2.0,Cat_6,B
4,462669,Female,Yes,40,Yes,Entertainment,1.0,High,6.0,Cat_6,A


## Tests

OPTIONS

Treatment of categorical variables:

variables = Profession, Spending_Score and Var_1
- onehotencoder
- ordinal

Models
- kneighborsclassifier
- RadiusNeighborsClassifier
- RandomForestClassifier
- DecisionTreeClassifier
- ExtraTreesClassifier
- CategoricalNB
- ComplementNB

# Dataset transformation

## Encoders

### OrdinalEncoder

In [6]:
#OrdinalEncoder transform dataset using a map for each variable
map_encoders = [
{'col':'Ever_Married','mapping':{'No':0, 'Yes':1}},
{'col':'Graduated','mapping':{'No':0, 'Yes':1}},
{'col':'Profession','mapping':{'Homemaker':0, 'Doctor':4, 'Marketing':1, 'Healthcare':7, 'Entertainment':6,
                               'Engineer':5, 'Artist':8, 'Lawyer':3, 'Executive':2}},
{'col':'Spending_Score','mapping':{'Low':0, 'Average':1, 'High':2}},
{'col':'Var_1','mapping':{'Cat_1':1, 'Cat_2':3, 'Cat_3':4, 'Cat_4':5, 'Cat_5':0, 'Cat_6':6, 'Cat_7':2}},
{'col': 'Segmentation', 'mapping':{'A':0, 'B':1, 'C':2, 'D':3}}
           ]
encoder = OrdinalEncoder(cols=['Ever_Married', 'Graduated','Profession', 'Spending_Score', 'Var_1', 'Segmentation'],
                        mapping=map_encoders)
dataset_ordinal = encoder.fit_transform(dataset)

#drop non used columns ID, Gender and Family_Size
dataset_ordinal.drop(columns=['ID', 'Gender', 'Family_Size'], inplace=True)

### OneHotEncoder

In [7]:
#OneHotEncoder
encoder = OneHotEncoder(cols=['Ever_Married', 'Graduated','Profession', 'Spending_Score', 'Var_1'],
                       use_cat_names=True)
dataset_onehorencoder = encoder.fit_transform(dataset)

#map column Segmentation and transform for numeric variables
map_segmentation = {'A':0, 'B':1, 'C':2, 'D':3}
dataset_onehorencoder['Segmentation'] = dataset_onehorencoder['Segmentation'].map(map_segmentation)

#drop non used columns ID, Gender and Family_Size
dataset_onehorencoder.drop(columns=['ID','Gender', 'Family_Size'], inplace=True)

## Variables

### OrdinalEncoder

In [8]:
X_ordinal = dataset_ordinal[['Ever_Married', 'Age', 'Graduated', 'Profession', 'Work_Experience', 'Spending_Score', 'Var_1']]
y_ordinal = dataset_ordinal['Segmentation']

### OneHotEncoder

In [9]:
#selection of all variables, except Segmentation
X_onehotencoder = dataset_onehorencoder.loc[:, dataset_onehorencoder.columns != 'Segmentation']
y_onehotencoder = dataset_onehorencoder['Segmentation']

## Test 1 - OneHotEnconder and Kneighborsclassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test1 = KNeighborsClassifier(n_neighbors=5, weights='distance')
    test1.fit(X_train, y_train)
    y_predict = test1.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 2 - Ordinal and Kneighborsclassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test2 = KNeighborsClassifier(n_neighbors=4, weights='distance')
    test2.fit(X_train, y_train)
    y_predict = test2.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 3 - OneHotEncoder and RadiusNeighborsClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test3 = RadiusNeighborsClassifier(n_neighbors=4, weights='distance', outlier_label='most_frequent')
    test3.fit(X_train, y_train)
    y_predict = test3.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 4 - OrdinalEncoder and RadiusNeighborsClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test4 = RadiusNeighborsClassifier(n_neighbors=4, weights='distance', outlier_label='most_frequent')
    test4.fit(X_train, y_train)
    y_predict = test4.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 5 - OneHotEncoder and RandomForestClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test5 = RandomForestClassifier(max_depth=4, min_samples_leaf=30)
    test5.fit(X_train, y_train)
    y_predict = test5.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 6 - OrdinalEncoder and RandomForestClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test6 = RandomForestClassifier(max_depth=4, min_samples_leaf=30)
    test6.fit(X_train, y_train)
    y_predict = test6.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 7 - OneHotEncoder and DecisionTreeClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.9,
                                                        stratify=y_onehotencoder)

    #third - macinhe learning apply
    test7 = DecisionTreeClassifier(max_depth=5, min_samples_leaf=30)
    test7.fit(X_train, y_train)
    y_predict = test7.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

In [None]:
y_train.value_counts(normalize=True)

## Test 8 - OrdinalEncoder and DecisionTreeClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test8 = DecisionTreeClassifier(max_depth=5, min_samples_leaf=30)
    test8.fit(X_train, y_train)
    y_predict = test8.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 9 - OneHotEncoder and ExtraTreesClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test7 = ExtraTreesClassifier(max_depth=4, min_samples_leaf=30)
    test7.fit(X_train, y_train)
    y_predict = test7.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 10 - OrdinalEncoder and ExtraTreesClassifier

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test10 = ExtraTreesClassifier(max_depth=4, min_samples_leaf=30)
    test10.fit(X_train, y_train)
    y_predict = test10.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 11 - OneHotEncoder and CategoricalNB

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test11 = CategoricalNB()
    test11.fit(X_train, y_train)
    y_predict = test11.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 12 - OrdinalEncoder and CategoricalNB

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test12 = CategoricalNB()
    test12.fit(X_train, y_train)
    y_predict = test12.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 13 - OneHotEncoder and ComplementNB

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test13 = ComplementNB()
    test13.fit(X_train, y_train)
    y_predict = test13.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 14 - OrdinalEncoder and ComplementNB

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test14 = ComplementNB()
    test14.fit(X_train, y_train)
    y_predict = test14.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Test 15 - OneHotEncoder and LinearSVC

In [19]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test15 = SVC()
    test15.fit(X_train, y_train)
    y_predict = test15.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

ACCURACY FOR 100 ITERATIONS
Max = 0.4841
Mean = 0.4575
Min = 0.4368
Std = 0.0077


## Test 16 - OrdinalEncoder and LinearSVC

In [20]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #third - macinhe learning apply
    test15 = SVC()
    test15.fit(X_train, y_train)
    y_predict = test15.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

ACCURACY FOR 100 ITERATIONS
Max = 0.4759
Mean = 0.4551
Min = 0.4350
Std = 0.0087


## Baseline (sklearn.dummy)

### OneHotEncoder

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_onehotencoder, y_onehotencoder, train_size=0.7)

    #third - macinhe learning apply
    test_dummy_1 = DummyClassifier(strategy='uniform')
    test_dummy_1.fit(X_train, y_train)
    y_predict = test_dummy_1.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

### OrdinalEncoder

In [None]:
#first - split train and test data
accuracy_list = []
iterations = 100
for iter in range(0, iterations):
    X_train, X_test, y_train, y_test = train_test_split(X_ordinal, y_ordinal, train_size=0.7)

    #second - transform independet variables (train and test)
    encoder = OrdinalEncoder(cols=['Ever_Married', 'Graduated','Profession', 'Spending_Score', 'Var_1'])
    X_train = encoder.fit_transform(X_train)
    X_test = encoder.fit_transform(X_test)

    #third - macinhe learning apply
    test_dummy_2 = DummyClassifier(strategy='uniform')
    test_dummy_2.fit(X_train, y_train)
    y_predict = test_dummy_2.predict(X_test)

    #fourth - test accuray
    accuracy = accuracy_score(y_test, y_predict)
    accuracy_list.append(accuracy)

max_accuracy = np.asarray(accuracy_list).max()
mean_accuracy = np.asarray(accuracy_list).mean()
min_accuracy = np.asarray(accuracy_list).min()
std_accuracy = np.asarray(accuracy_list).std()
print(f'ACCURACY FOR {iterations} ITERATIONS')
print(f'Max = {max_accuracy:.4f}\nMean = {mean_accuracy:.4f}\nMin = {min_accuracy:.4f}\nStd = {std_accuracy:.4f}')

## Conclusions

- test 6 reach the best scores (higher mean and minimum standard deviation) in 100 iterations
- test 12 presented an average close to test 6, but with a smaller standard deviation
- the scores of tests 6 and 12 is 100% bigger then a dummy classifier
- seems that algorithm CategoricalNB is more fast them RandomForestClassifier with same accuracy

## Next step

- work parameters to increase model accuracy for tests 6 and 12