In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.utils import resample

# Data

In [2]:
np.random.seed(2)

dataset_size = ["small", "medium", "large", "beta", "alpha", "gamma"][-1]

dataset_info = {
    "small": {
        "dataset_name": "wine",
        "class_name": "Class",
        "drop_fields": []
    },
    "medium": {
        "dataset_name": "breast-cancer-wisconsin",
        "class_name": "Class",
        "drop_fields": ["Sample code number"]
    },
    "large": {
        "dataset_name": "seismic-bumps",
        "class_name": "class",
        "drop_fields": []
    },
    "beta": {
        "dataset_name": "Acoustic_Extinguisher_Fire_Dataset",
        "class_name": "Class",
        "drop_fields": []
    },
    "alpha": {
        "dataset_name": "ThoracicSurgery",
        "class_name": "Risk1Yr",
        "drop_fields": []
    },
    "gamma": {
        "dataset_name": "agaricus-lepiota",
        "class_name": "Class",
        "drop_fields": []
    },
}

dataset_name = dataset_info[dataset_size]["dataset_name"]
class_name = dataset_info[dataset_size]["class_name"]
drop_fields = dataset_info[dataset_size]["drop_fields"]

df = pd.read_csv('../data/' + dataset_name + ".csv")
df = df.drop(drop_fields, axis=1)
df = df.iloc[np.random.permutation(len(df))]

if dataset_name == "breast-cancer-wisconsin":
    df[class_name].replace({2: 0, 4: 1}, inplace=True)
    
if dataset_name == "ThoracicSurgery":
    df[class_name].replace({'T': 1, 'F': 0}, inplace=True)
    
if dataset_name == "agaricus-lepiota":
    df[class_name].replace({'p': 1, 'e': 0}, inplace=True)

n_cut = int(0.8*len(df))
df_trn = df[:n_cut]
df_tst = df[n_cut:]

if dataset_name == "ThoracicSurgery":
    df_1 = df_trn[df_trn[class_name] == 1]
    df_0 = df_trn[df_trn[class_name] != 1]  
    df_1_upsampled = resample(df_1, random_state=2, n_samples=len(df_0), replace=True)

    df_upsampled = pd.concat([df_1_upsampled, df_0], ignore_index=True)
    df_upsampled = df_upsampled.iloc[np.random.permutation(df_upsampled.index)]
    
    df_0_downsampled = resample(df_0, random_state=20, n_samples=len(df_1), replace=False)
    df_downsampled = pd.concat([df_0_downsampled, df_1], ignore_index=True)
    df_downsampled = df_downsampled.iloc[np.random.permutation(df_downsampled.index)]
    
    # df_trn = df_upsampled
    df_trn = df_downsampled

X_trn = df_trn.drop(class_name, axis=1)
y_trn = df_trn[class_name]

X_tst = df_tst.drop(class_name, axis=1)
y_tst = df_tst[class_name]

# Random Forest

In [3]:
from RandomForest_df import RandomForest_df

M = X_trn.shape[1]
CV_dict_params = {'NT': [1, 10, 25, 50, 75, 100],
                  'F': sorted(list(set([1, 3, int(np.log2(M + 1)), int(np.sqrt(M))])))
                  }

best_F, best_NT, best_score = None, None, -1
all_metrics_CV = []

for F_ in CV_dict_params['F']:
    for NT_ in CV_dict_params['NT']:
        all_scores = np.zeros(5)
        N = len(X_trn)
        for run in range(5):
            
            ind_tst_ = np.full(len(X_trn), False)
            ind_tst_[int(N*run/5):int(N*(run+1)/5)] = True
            
            X_trn_, y_trn_ = X_trn[ind_tst_], y_trn[ind_tst_]
            X_tst_, y_tst_ = X_trn[~ind_tst_], y_trn[~ind_tst_]
            
            clf = RandomForest_df(NT=NT_, F=F_)
            clf.fit(X_trn_, y_trn_)
            all_scores[run] = clf.score(X_tst_, y_tst_)
            
        score_ = all_scores.mean()
        all_metrics_CV.append(score_)
        
        if score_ > best_score:
            best_F, best_NT, best_score = F_, NT_, score_
        print(f'(F, NT) = {(F_, NT_)} \t--> \t F1-Score = {round(score_, 3)}')
        
all_metrics_CV = np.array(all_metrics_CV)


(F, NT) = (1, 1) 	--> 	 F1-Score = 0.982
(F, NT) = (1, 10) 	--> 	 F1-Score = 0.95
(F, NT) = (1, 25) 	--> 	 F1-Score = 0.979
(F, NT) = (1, 50) 	--> 	 F1-Score = 0.988
(F, NT) = (1, 75) 	--> 	 F1-Score = 0.989
(F, NT) = (1, 100) 	--> 	 F1-Score = 0.99
(F, NT) = (3, 1) 	--> 	 F1-Score = 0.936
(F, NT) = (3, 10) 	--> 	 F1-Score = 0.969
(F, NT) = (3, 25) 	--> 	 F1-Score = 0.986
(F, NT) = (3, 50) 	--> 	 F1-Score = 0.977
(F, NT) = (3, 75) 	--> 	 F1-Score = 0.977
(F, NT) = (3, 100) 	--> 	 F1-Score = 0.975
(F, NT) = (4, 1) 	--> 	 F1-Score = 0.951
(F, NT) = (4, 10) 	--> 	 F1-Score = 0.911
(F, NT) = (4, 25) 	--> 	 F1-Score = 0.969
(F, NT) = (4, 50) 	--> 	 F1-Score = 0.964
(F, NT) = (4, 75) 	--> 	 F1-Score = 0.973
(F, NT) = (4, 100) 	--> 	 F1-Score = 0.971


In [4]:
print(f'Best Parameters (F, NT): {(best_F, best_NT)}')
best_RF = RandomForest_df(NT=best_NT, F=best_F)
best_RF.fit(X_trn, y_trn, verbose=3)


Best Parameters (F, NT): (1, 100)
Feature importance: Index(['odor', 'stalk-root', 'gill-color', 'cap-color', 'cap-shape',
       'cap-surface', 'population', 'gill-spacing', 'spore-print-color',
       'stalk-surface-above-ring', 'habitat', 'stalk-surface-below-ring',
       'stalk-shape', 'gill-size', 'ring-type', 'stalk-color-below-ring',
       'stalk-color-above-ring', 'ring-number', 'bruises?', 'veil-color',
       'gill-attachment', 'veil-type'],
      dtype='object')


RandomForest_df()

In [5]:
y_trn_hat = best_RF.predict(X_trn)
y_tst_hat = best_RF.predict(X_tst)

In [6]:
print(f'Accuracy (test): {round(accuracy_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print(f'Precision (test): {round(precision_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print(f'Recall (test): {round(recall_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print(f'F1 Score (test): {round(f1_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print('-'*15)
print(f'Accuracy (train): {round(accuracy_score(y_trn.to_numpy(), y_trn_hat), 3)}')
print(f'Precision (train): {round(precision_score(y_trn.to_numpy(), y_trn_hat), 3)}')
print(f'Recall (train): {round(recall_score(y_trn.to_numpy(), y_trn_hat), 3)}')
print(f'F1 Score (train): {round(f1_score(y_trn.to_numpy(), y_trn_hat), 3)}')


Accuracy (test): 0.99
Precision (test): 1.0
Recall (test): 0.98
F1 Score (test): 0.99
---------------
Accuracy (train): 0.987
Precision (train): 1.0
Recall (train): 0.973
F1 Score (train): 0.986


In [8]:
all_importances = []
all_metrics = []

for F_ in CV_dict_params['F']:
    for NT_ in CV_dict_params['NT']:
        clf = RandomForest_df(NT=NT_, F=F_)
        print(f'(F, NT) = {(F_, NT_)}')
        clf.fit(X_trn, y_trn, verbose=3)
        all_importances.append(np.flip(np.argsort(clf.importance)))
        y_tst_hat = clf.predict(X_tst)     
        
        acc = accuracy_score(y_tst.to_numpy(), y_tst_hat)
        prec = precision_score(y_tst.to_numpy(), y_tst_hat)
        rec = recall_score(y_tst.to_numpy(), y_tst_hat)
        f1_ = f1_score(y_tst.to_numpy(), y_tst_hat)
           
        print(f'Accuracy (test): {round(acc, 3)}')
        print(f'Precision (test): {round(prec, 3)}')
        print(f'Recall (test): {round(rec, 3)}')
        print(f'F1 Score (test): {round(f1_, 3)}')
        print('-'*15)
        
        all_metrics.append([acc, prec, rec, f1_])
        
all_importances = np.array(all_importances)
all_metrics = np.array(all_metrics)

(F, NT) = (1, 1)
Feature importance: Index(['stalk-color-above-ring', 'stalk-root', 'cap-surface', 'ring-number',
       'veil-color', 'bruises?', 'stalk-color-below-ring', 'gill-spacing',
       'gill-size', 'gill-color', 'cap-color', 'odor', 'gill-attachment',
       'habitat', 'stalk-shape', 'population', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'veil-type', 'ring-type',
       'spore-print-color', 'cap-shape'],
      dtype='object')
Accuracy (test): 0.808
Precision (test): 0.763
Recall (test): 0.885
F1 Score (test): 0.82
---------------
(F, NT) = (1, 10)
Feature importance: Index(['cap-surface', 'stalk-root', 'gill-color', 'spore-print-color',
       'stalk-color-below-ring', 'stalk-shape', 'cap-color', 'gill-spacing',
       'ring-type', 'stalk-color-above-ring', 'stalk-surface-above-ring',
       'gill-size', 'bruises?', 'habitat', 'population',
       'stalk-surface-below-ring', 'veil-color', 'cap-shape', 'odor',
       'ring-number', 'gill-attachment', 've

In [9]:
all_metrics_CV

array([0.98246708, 0.9501691 , 0.97927216, 0.98754945, 0.98932952,
       0.99012133, 0.93619155, 0.96929713, 0.98600619, 0.9772101 ,
       0.97657253, 0.97492239, 0.95112136, 0.91128487, 0.96870849,
       0.96409401, 0.97254011, 0.97127873])

In [10]:
all_metrics_CV

array([0.98246708, 0.9501691 , 0.97927216, 0.98754945, 0.98932952,
       0.99012133, 0.93619155, 0.96929713, 0.98600619, 0.9772101 ,
       0.97657253, 0.97492239, 0.95112136, 0.91128487, 0.96870849,
       0.96409401, 0.97254011, 0.97127873])

In [11]:
print(all_metrics.T)

[[0.808      0.95138462 0.98584615 0.98830769 0.99261538 0.99015385
  0.96307692 0.92061538 0.93969231 0.95938462 0.98461538 0.97969231
  0.78030769 0.99384615 0.98769231 0.98769231 0.98769231 0.98276923]
 [0.76318622 0.99724518 1.         1.         1.         1.
  0.93031359 1.         0.99858156 1.         1.         1.
  0.94758065 1.         1.         1.         1.         1.        ]
 [0.88514357 0.90387016 0.97128589 0.97627965 0.98501873 0.98002497
  1.         0.83895131 0.87890137 0.917603   0.96878901 0.9588015
  0.58676654 0.98751561 0.97503121 0.97503121 0.97503121 0.9650437 ]
 [0.81965318 0.94826457 0.98543382 0.98799747 0.99245283 0.98991173
  0.96389892 0.91242363 0.93492696 0.95703125 0.98414711 0.9789675
  0.72474942 0.99371859 0.98735777 0.98735777 0.98735777 0.98221093]]


In [12]:
print(all_metrics_CV.T)

[0.98246708 0.9501691  0.97927216 0.98754945 0.98932952 0.99012133
 0.93619155 0.96929713 0.98600619 0.9772101  0.97657253 0.97492239
 0.95112136 0.91128487 0.96870849 0.96409401 0.97254011 0.97127873]


In [13]:
from utils.print_latex import print_table

print("IMPORTANCES")
print_table(all_importances.T)
print("-"*15)

print("ACC - PRECISION - RECALL - F1 (TEST)")
print_table(all_metrics.T)
print("-"*15)

print("F1 (CV)")
print_table(all_metrics_CV)
print("-"*15)


IMPORTANCES
\textbf{\#1} & & 13 & 1 & 10 & 10 & 4 & 4 & 8 & 20 & 19 & 19 & 4 & 4 & 11 & 19 & 19 & 19 & 4 & 4\\
\hline
\textbf{\#2} & & 10 & 10 & 8 & 4 & 10 & 10 & 19 & 21 & 21 & 21 & 21 & 21 & 21 & 4 & 4 & 4 & 19 & 19\\
\hline
\textbf{\#3} & & 1 & 8 & 1 & 1 & 2 & 8 & 13 & 19 & 7 & 4 & 19 & 19 & 9 & 6 & 7 & 7 & 7 & 8\\
\hline
\textbf{\#4} & & 17 & 19 & 14 & 8 & 1 & 2 & 11 & 4 & 4 & 14 & 8 & 8 & 1 & 21 & 12 & 21 & 8 & 7\\
\hline
\textbf{\#5} & & 16 & 14 & 19 & 14 & 8 & 0 & 7 & 7 & 13 & 20 & 7 & 20 & 2 & 9 & 17 & 12 & 21 & 12\\
\hline
\textbf{\#6} & & 3 & 9 & 4 & 20 & 0 & 1 & 9 & 13 & 20 & 7 & 20 & 7 & 3 & 18 & 2 & 8 & 12 & 21\\
\hline
\textbf{\#7} & & 14 & 2 & 2 & 0 & 20 & 20 & 21 & 2 & 14 & 13 & 11 & 11 & 4 & 1 & 13 & 2 & 2 & 2\\
\hline
\textbf{\#8} & & 6 & 6 & 6 & 19 & 19 & 6 & 1 & 3 & 2 & 8 & 13 & 2 & 5 & 2 & 11 & 11 & 17 & 18\\
\hline
\textbf{\#9} & & 7 & 18 & 20 & 2 & 6 & 19 & 2 & 8 & 12 & 2 & 10 & 10 & 6 & 7 & 8 & 17 & 14 & 14\\
\hline
\textbf{\#10} & & 8 & 13 & 7 & 6 & 21 & 11 & 3

In [14]:
from sys import modules
del modules["utils.print_latex"]

# Decision Tree

In [15]:
from DecisionForest_df import DecisionForest_df

M = X_trn.shape[1]
CV_dict_params = {'NT': [1, 10, 25, 50, 75, 100],
                  'F': sorted(list(set([int(M/4), int(M/2), int(3*M/4)]))) + [-1]
                  }

best_F, best_NT, best_score = None, None, -1
all_metrics_CV = []

for F_ in CV_dict_params['F']:
    for NT_ in CV_dict_params['NT']:
        all_scores = np.zeros(5)
        N = len(X_trn)
        for run in range(5):
            
            ind_tst_ = np.full(len(X_trn), False)
            ind_tst_[int(N*run/5):int(N*(run+1)/5)] = True
            
            X_trn_, y_trn_ = X_trn[ind_tst_], y_trn[ind_tst_]
            X_tst_, y_tst_ = X_trn[~ind_tst_], y_trn[~ind_tst_]
            
            clf = DecisionForest_df(NT=NT_, F=F_)
            clf.fit(X_trn_, y_trn_)
            all_scores[run] = clf.score(X_tst_, y_tst_)
            
        score_ = all_scores.mean()
        all_metrics_CV.append(score_)
        if score_ > best_score:
            best_F, best_NT, best_score = F_, NT_, score_
        print(f'(F, NT) = {(F_, NT_)} \t--> \t F1-Score = {round(score_, 3)}')
        
all_metrics_CV = np.array(all_metrics_CV)


(F, NT) = (5, 1) 	--> 	 F1-Score = 0.819
(F, NT) = (5, 10) 	--> 	 F1-Score = 0.939
(F, NT) = (5, 25) 	--> 	 F1-Score = 0.913
(F, NT) = (5, 50) 	--> 	 F1-Score = 0.881
(F, NT) = (5, 75) 	--> 	 F1-Score = 0.889
(F, NT) = (5, 100) 	--> 	 F1-Score = 0.884
(F, NT) = (11, 1) 	--> 	 F1-Score = 0.91
(F, NT) = (11, 10) 	--> 	 F1-Score = 0.913
(F, NT) = (11, 25) 	--> 	 F1-Score = 0.0
(F, NT) = (11, 50) 	--> 	 F1-Score = 0.0
(F, NT) = (11, 75) 	--> 	 F1-Score = 0.0
(F, NT) = (11, 100) 	--> 	 F1-Score = 0.0
(F, NT) = (16, 1) 	--> 	 F1-Score = 0.997
(F, NT) = (16, 10) 	--> 	 F1-Score = 0.0
(F, NT) = (16, 25) 	--> 	 F1-Score = 0.0
(F, NT) = (16, 50) 	--> 	 F1-Score = 0.0
(F, NT) = (16, 75) 	--> 	 F1-Score = 0.0
(F, NT) = (16, 100) 	--> 	 F1-Score = 0.0
(F, NT) = (-1, 1) 	--> 	 F1-Score = 0.99
(F, NT) = (-1, 10) 	--> 	 F1-Score = 0.858
(F, NT) = (-1, 25) 	--> 	 F1-Score = 0.806
(F, NT) = (-1, 50) 	--> 	 F1-Score = 0.511
(F, NT) = (-1, 75) 	--> 	 F1-Score = 0.0
(F, NT) = (-1, 100) 	--> 	 F1-Score = 0.

In [16]:
print(f'Best Parameters (F, NT): {(best_F, best_NT)}')
best_DF = DecisionForest_df(NT=best_NT, F=best_F)
best_DF.fit(X_trn, y_trn, verbose=3)
print(np.flip(np.argsort(best_DF.importance)))

Best Parameters (F, NT): (16, 1)
Feature importance: Index(['stalk-color-below-ring', 'habitat', 'spore-print-color', 'cap-surface',
       'odor', 'stalk-shape', 'cap-color', 'bruises?', 'gill-attachment',
       'gill-spacing', 'gill-size', 'gill-color', 'stalk-root', 'population',
       'stalk-surface-above-ring', 'stalk-surface-below-ring',
       'stalk-color-above-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'cap-shape'],
      dtype='object')
[14 21 19  1  4  9  2  3  5  6  7  8 10 20 11 12 13 15 16 17 18  0]


In [17]:
y_trn_hat = best_DF.predict(X_trn)
y_tst_hat = best_DF.predict(X_tst)

In [18]:
print(f'Accuracy (test): {round(accuracy_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print(f'Precision (test): {round(precision_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print(f'Recall (test): {round(recall_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print(f'F1 Score (test): {round(f1_score(y_tst.to_numpy(), y_tst_hat), 3)}')
print('-'*15)
print(f'Accuracy (train): {round(accuracy_score(y_trn.to_numpy(), y_trn_hat), 3)}')
print(f'Precision (train): {round(precision_score(y_trn.to_numpy(), y_trn_hat), 3)}')
print(f'Recall (train): {round(recall_score(y_trn.to_numpy(), y_trn_hat), 3)}')
print(f'F1 Score (train): {round(f1_score(y_trn.to_numpy(), y_trn_hat), 3)}')


Accuracy (test): 0.997
Precision (test): 1.0
Recall (test): 0.994
F1 Score (test): 0.997
---------------
Accuracy (train): 0.998
Precision (train): 1.0
Recall (train): 0.995
F1 Score (train): 0.998


In [19]:
all_importances = []
all_metrics = []

for F_ in CV_dict_params['F']:
    for NT_ in CV_dict_params['NT']:
        clf = DecisionForest_df(NT=NT_, F=F_)
        print(f'(F, NT) = {(F_, NT_)}')
        clf.fit(X_trn, y_trn, verbose=3)
        y_tst_hat = clf.predict(X_tst)   
        
        all_importances.append(np.flip(np.argsort(clf.importance)))     
        
        acc = accuracy_score(y_tst.to_numpy(), y_tst_hat)
        prec = precision_score(y_tst.to_numpy(), y_tst_hat)
        rec = recall_score(y_tst.to_numpy(), y_tst_hat)
        f1_ = f1_score(y_tst.to_numpy(), y_tst_hat)
           
        print(f'Accuracy (test): {round(acc, 3)}')
        print(f'Precision (test): {round(prec, 3)}')
        print(f'Recall (test): {round(rec, 3)}')
        print(f'F1 Score (test): {round(f1_, 3)}')
        print('-'*15)
        
        all_metrics.append([acc, prec, rec, f1_])
        
all_importances = np.array(all_importances)
all_metrics = np.array(all_metrics)

(F, NT) = (5, 1)
Feature importance: Index(['population', 'stalk-root', 'cap-surface', 'stalk-color-below-ring',
       'stalk-color-above-ring', 'gill-color', 'cap-color', 'bruises?', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'habitat',
       'stalk-shape', 'stalk-surface-above-ring', 'stalk-surface-below-ring',
       'veil-type', 'veil-color', 'ring-number', 'ring-type',
       'spore-print-color', 'cap-shape'],
      dtype='object')
Accuracy (test): 0.806
Precision (test): 0.751
Recall (test): 0.905
F1 Score (test): 0.821
---------------
(F, NT) = (5, 10)
Feature importance: Index(['cap-color', 'spore-print-color', 'population',
       'stalk-color-above-ring', 'habitat', 'cap-surface', 'gill-spacing',
       'ring-type', 'bruises?', 'stalk-color-below-ring', 'odor', 'gill-size',
       'stalk-root', 'stalk-shape', 'stalk-surface-above-ring', 'ring-number',
       'cap-shape', 'gill-color', 'gill-attachment',
       'stalk-surface-below-ring', 'veil-type', 've

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (11, 50)
Feature importance: Index(['spore-print-color', 'stalk-color-below-ring', 'cap-color', 'odor',
       'habitat', 'population', 'gill-color', 'ring-number', 'cap-surface',
       'gill-size', 'stalk-shape', 'ring-type', 'gill-spacing', 'cap-shape',
       'stalk-color-above-ring', 'veil-color', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-root', 'gill-attachment', 'bruises?',
       'veil-type'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (11, 75)
Feature importance: Index(['spore-print-color', 'stalk-color-below-ring', 'odor', 'cap-color',
       'ring-number', 'gill-color', 'population', 'habitat', 'cap-surface',
       'gill-size', 'stalk-shape', 'ring-type', 'stalk-surface-above-ring',
       'stalk-color-above-ring', 'gill-spacing', 'veil-color', 'cap-shape',
       'stalk-surface-below-ring', 'stalk-root', 'gill-attachment', 'bruises?',
       'veil-type'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (11, 100)
Feature importance: Index(['spore-print-color', 'stalk-color-below-ring', 'odor', 'cap-color',
       'gill-color', 'gill-size', 'ring-number', 'cap-surface', 'habitat',
       'population', 'ring-type', 'stalk-surface-above-ring', 'stalk-shape',
       'gill-spacing', 'veil-color', 'stalk-color-above-ring', 'cap-shape',
       'stalk-surface-below-ring', 'stalk-root', 'gill-attachment', 'bruises?',
       'veil-type'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (16, 1)
Feature importance: Index(['stalk-color-below-ring', 'habitat', 'spore-print-color', 'cap-surface',
       'odor', 'stalk-shape', 'cap-color', 'bruises?', 'gill-attachment',
       'gill-spacing', 'gill-size', 'gill-color', 'stalk-root', 'population',
       'stalk-surface-above-ring', 'stalk-surface-below-ring',
       'stalk-color-above-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'cap-shape'],
      dtype='object')
Accuracy (test): 0.997
Precision (test): 1.0
Recall (test): 0.994
F1 Score (test): 0.997
---------------
(F, NT) = (16, 10)
Feature importance: Index(['stalk-color-below-ring', 'spore-print-color', 'cap-surface', 'odor',
       'habitat', 'gill-size', 'ring-type', 'ring-number',
       'stalk-color-above-ring', 'stalk-surface-above-ring', 'gill-spacing',
       'cap-color', 'bruises?', 'gill-attachment', 'stalk-root', 'gill-color',

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (16, 25)
Feature importance: Index(['stalk-color-below-ring', 'spore-print-color', 'cap-surface', 'odor',
       'gill-size', 'habitat', 'ring-number', 'stalk-color-above-ring',
       'gill-spacing', 'population', 'cap-color', 'stalk-root',
       'stalk-surface-above-ring', 'ring-type', 'stalk-shape', 'gill-color',
       'stalk-surface-below-ring', 'gill-attachment', 'veil-type', 'bruises?',
       'veil-color', 'cap-shape'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (16, 50)
Feature importance: Index(['stalk-color-below-ring', 'spore-print-color', 'odor', 'cap-surface',
       'ring-number', 'habitat', 'gill-spacing', 'gill-size', 'cap-color',
       'ring-type', 'stalk-color-above-ring', 'gill-color', 'stalk-root',
       'population', 'stalk-surface-above-ring', 'stalk-shape',
       'gill-attachment', 'stalk-surface-below-ring', 'bruises?', 'veil-type',
       'veil-color', 'cap-shape'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (16, 75)
Feature importance: Index(['stalk-color-below-ring', 'spore-print-color', 'odor', 'cap-surface',
       'ring-number', 'habitat', 'gill-size', 'cap-color', 'gill-spacing',
       'gill-color', 'ring-type', 'stalk-color-above-ring',
       'stalk-surface-above-ring', 'stalk-root', 'population', 'veil-color',
       'stalk-shape', 'gill-attachment', 'stalk-surface-below-ring',
       'bruises?', 'veil-type', 'cap-shape'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (16, 100)
Feature importance: Index(['stalk-color-below-ring', 'spore-print-color', 'odor', 'cap-surface',
       'ring-number', 'habitat', 'gill-size', 'cap-color', 'gill-spacing',
       'gill-color', 'ring-type', 'stalk-color-above-ring',
       'stalk-surface-above-ring', 'population', 'stalk-root', 'veil-color',
       'stalk-shape', 'gill-attachment', 'stalk-surface-below-ring',
       'bruises?', 'veil-type', 'cap-shape'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (-1, 1)
Feature importance: Index(['ring-number', 'odor', 'stalk-color-below-ring', 'gill-color',
       'habitat', 'stalk-shape', 'cap-surface', 'cap-color', 'bruises?',
       'gill-attachment', 'gill-spacing', 'gill-size', 'stalk-root',
       'population', 'stalk-surface-above-ring', 'stalk-surface-below-ring',
       'stalk-color-above-ring', 'veil-type', 'veil-color', 'ring-type',
       'spore-print-color', 'cap-shape'],
      dtype='object')
Accuracy (test): 0.991
Precision (test): 1.0
Recall (test): 0.983
F1 Score (test): 0.991
---------------
(F, NT) = (-1, 10)
Feature importance: Index(['odor', 'cap-color', 'ring-number', 'habitat', 'spore-print-color',
       'gill-color', 'stalk-color-below-ring', 'population', 'gill-size',
       'stalk-shape', 'stalk-color-above-ring', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'veil-color', 'ring-type', 'cap

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------
(F, NT) = (-1, 100)
Feature importance: Index(['stalk-color-below-ring', 'spore-print-color', 'habitat', 'cap-color',
       'odor', 'gill-size', 'ring-type', 'stalk-surface-above-ring',
       'population', 'stalk-color-above-ring', 'gill-color', 'ring-number',
       'stalk-surface-below-ring', 'gill-spacing', 'stalk-root', 'cap-surface',
       'stalk-shape', 'bruises?', 'veil-color', 'cap-shape', 'gill-attachment',
       'veil-type'],
      dtype='object')
Accuracy (test): 0.507
Precision (test): 0.0
Recall (test): 0.0
F1 Score (test): 0.0
---------------


  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
print(all_metrics.T)


[[0.80553846 0.96123077 0.92430769 0.90584615 0.91446154 0.91323077
  0.94523077 0.93784615 0.50707692 0.50707692 0.50707692 0.50707692
  0.99692308 0.50707692 0.50707692 0.50707692 0.50707692 0.50707692
  0.99138462 0.88738462 0.82769231 0.73230769 0.50707692 0.50707692]
 [0.75129534 0.98938992 0.99273256 1.         1.         1.
  0.945      1.         0.         0.         0.         0.
  1.         0.         0.         0.         0.         0.
  1.         1.         1.         1.         0.         0.        ]
 [0.9051186  0.93133583 0.85268414 0.80898876 0.82646692 0.82397004
  0.94382022 0.87390762 0.         0.         0.         0.
  0.9937578  0.         0.         0.         0.         0.
  0.98252185 0.77153558 0.65043695 0.45692884 0.         0.        ]
 [0.82106455 0.95948553 0.91739422 0.89440994 0.90498975 0.90349076
  0.94440974 0.93271153 0.         0.         0.         0.
  0.99686913 0.         0.         0.         0.         0.
  0.99118388 0.87103594 0.7881997

In [21]:
print(all_metrics_CV.T)

[0.81876692 0.93864944 0.91288706 0.88081691 0.888659   0.88360293
 0.9099438  0.91324671 0.         0.         0.         0.
 0.99734331 0.         0.         0.         0.         0.
 0.99010784 0.85835245 0.80572396 0.5107593  0.         0.        ]


In [22]:
from utils.print_latex import print_table

print("IMPORTANCES")
print_table(all_importances.T)
print("-"*15)

print("ACC - PRECISION - RECALL - F1 (TEST)")
print_table(all_metrics.T)
print("-"*15)

print("F1 (CV)")
print_table(all_metrics_CV)
print("-"*15)


IMPORTANCES
\textbf{\#1} & & 20 & 2 & 19 & 2 & 2 & 2 & 20 & 19 & 19 & 19 & 19 & 19 & 14 & 14 & 14 & 14 & 14 & 14 & 17 & 4 & 19 & 21 & 14 & 14\\
\hline
\textbf{\#2} & & 10 & 19 & 2 & 19 & 20 & 19 & 21 & 14 & 14 & 14 & 14 & 14 & 21 & 19 & 19 & 19 & 19 & 19 & 4 & 2 & 14 & 19 & 19 & 19\\
\hline
\textbf{\#3} & & 1 & 20 & 20 & 21 & 19 & 21 & 13 & 20 & 20 & 2 & 4 & 4 & 19 & 1 & 1 & 4 & 4 & 4 & 14 & 17 & 4 & 4 & 21 & 21\\
\hline
\textbf{\#4} & & 14 & 13 & 14 & 20 & 13 & 13 & 10 & 2 & 2 & 4 & 2 & 2 & 1 & 4 & 4 & 1 & 1 & 1 & 8 & 21 & 20 & 20 & 4 & 2\\
\hline
\textbf{\#5} & & 13 & 21 & 13 & 14 & 21 & 20 & 19 & 21 & 4 & 21 & 17 & 8 & 4 & 21 & 7 & 17 & 17 & 17 & 21 & 19 & 7 & 14 & 20 & 4\\
\hline
\textbf{\#6} & & 8 & 1 & 1 & 13 & 14 & 14 & 17 & 4 & 21 & 20 & 8 & 7 & 9 & 7 & 21 & 21 & 21 & 21 & 9 & 8 & 21 & 11 & 7 & 7\\
\hline
\textbf{\#7} & & 2 & 6 & 21 & 4 & 4 & 11 & 7 & 1 & 1 & 8 & 20 & 17 & 2 & 18 & 17 & 6 & 7 & 7 & 1 & 14 & 17 & 7 & 11 & 18\\
\hline
\textbf{\#8} & & 3 & 18 & 3 & 1 & 10 & 8 & 1 