In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, make_scorer
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, StratifiedKFold, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression, RidgeClassifier, LassoCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.base import clone
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import StackingClassifier

import warnings
    
# warnings -> to silence warnings

warnings.filterwarnings("ignore")
np.set_printoptions(precision=5, suppress=True)


RANDOM_STATE = 1
N_JOBS = -1

class_names = ["Canis", "Dysg. Equisimilis", "Dysg. Dysgalactiae"]

map_target = {
    "Streptococcus canis": 0,
    "Streptococcus dysgalactiae subsp. equisimilis": 1,
    "Streptococcus dysgalactiae subsp. dysgalactiae": 2
}

map_target_inv = {
    0: "Strept. canis",
    1: "Strept. dysg. equisimilis",
    2: "Strept. dysg. dysgalactiae"
}
map_target_antibiotici = {
    "S" : 1,
    "NS" : 0
}

start = 9
n_antibiotici = 9
n_geni = 27
n_virulenza = 18

In [14]:
# Load the data
n = 46
df = pd.read_csv("../data/Dati_Matemaldomics_"+str(n)+"picchi.csv",
                    delimiter=';', index_col='ID Strain')

In [15]:
df['subspecies'] = df["Putative Subspecies"].map(map_target)

feat_agg = df.iloc[:,[7,8]]
display(feat_agg)
st = df.iloc[:,[4]]
display(st)
subspecies = df[['subspecies']]
maldi = df.iloc[:,start:start+n]
antibiotici = df.iloc[:,start+n:start+n+n_antibiotici]
geni_antibiotici = df.iloc[:,start+n+n_antibiotici:start+n+n_antibiotici+n_geni]
virulenza = df.iloc[:,start+n+n_antibiotici+n_geni:start+n+n_antibiotici+n_geni+n_virulenza]

maldi.fillna(0, inplace=True)
maldi = maldi.replace(',', '.', regex=True)
columns = maldi.columns
for column in columns:
    maldi[column] = maldi[column].astype(float)
display(maldi)

targets = {'antibiotici' : antibiotici,
            'geni_antibiotici' : geni_antibiotici,
            'virulenza' : virulenza}

for str_target,target in targets.items():
    columns = target.columns
    for column in columns:
        if str_target == 'antibiotici':
            target[column] = df[column].map(map_target_antibiotici)
        rapporto = (target[column] == 0).sum() / target.shape[0]
        #if (antibiotici[column] == 0).all() or (antibiotici[column] == 1).all():
        print(column+" : "+str(rapporto))
        if rapporto < 0.15 or rapporto > 0.85:
            target.drop([column], axis=1, inplace=True)
            
    display(target)
    
targets['st'] = st
target['subspecies'] = subspecies

Unnamed: 0_level_0,LANCEFIELD GROUP,Haemolysis
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1
V13,G,b
V142,G,b
V151,G,b
V160,G,b
V161,G,b
...,...,...
V800,C,a
V82,G,b
V90,G,b
V91,G,b


Unnamed: 0_level_0,ST
ID Strain,Unnamed: 1_level_1
V13,ST13
V142,ST23
V151,ST95
V160,ST15
V161,ST9
...,...
V800,ST307
V82,ST9
V90,ST13
V91,ST9


Unnamed: 0_level_0,"2223,140967","2241,073989","2262,75751","2679,802856","2978,296408","3159,441237","3354,28405","3364,608472","3397,909861","3418,174965",...,"9030,351844","9073,208159","9487,183195","10103,20284","10400,80576","10491,16654","10930,54833","13276,73249","14943,03835","15048,89449"
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
V13,0.000665,0.000180,0.000198,0.000059,0.000865,0.000497,0.000197,0.000272,0.000578,0.000198,...,0.000206,0.000133,0.000587,0.000180,0.000065,0.000065,0.000172,0.000076,0.000058,0.000142
V142,0.000648,0.000156,0.000192,0.000400,0.000698,0.000414,0.000684,0.000349,0.000617,0.000494,...,0.000219,0.000205,0.000465,0.000193,0.000142,0.000116,0.000165,0.000067,0.000067,0.000194
V151,0.000545,0.000331,0.000204,0.000277,0.000613,0.000464,0.000279,0.001031,0.000624,0.000409,...,0.000261,0.000263,0.000585,0.000235,0.000198,0.000152,0.000208,0.000084,0.000092,0.000069
V160,0.000509,0.000191,0.000193,0.000266,0.000489,0.000358,0.000628,0.000692,0.000564,0.000611,...,0.000311,0.000312,0.000083,0.000241,0.000242,0.000187,0.000245,0.000082,0.000075,0.000136
V161,0.000517,0.000118,0.000158,0.000163,0.000610,0.000502,0.000621,0.000315,0.000507,0.000638,...,0.000206,0.000224,0.000711,0.000242,0.000156,0.000116,0.000154,0.000093,0.000072,0.000203
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V800,0.000456,0.000253,0.000125,0.000061,0.000586,0.000365,0.000258,0.001095,0.000537,0.000214,...,0.000102,0.000195,0.000843,0.000250,0.000088,0.000109,0.000298,0.000060,0.000073,0.000059
V82,0.000459,0.000121,0.000147,0.000270,0.000571,0.000338,0.000596,0.000321,0.000537,0.000668,...,0.000263,0.000277,0.000452,0.000254,0.000211,0.000162,0.000203,0.000059,0.000070,0.000144
V90,0.000505,0.000154,0.000158,0.000181,0.000610,0.000473,0.000374,0.000525,0.000513,0.000377,...,0.000288,0.000310,0.000671,0.000237,0.000224,0.000185,0.000230,0.000089,0.000080,0.000137
V91,0.000520,0.000124,0.000169,0.000174,0.000597,0.000414,0.000572,0.000310,0.000532,0.000646,...,0.000237,0.000259,0.000514,0.000237,0.000202,0.000150,0.000175,0.000092,0.000078,0.000214


Eritromicina : 0.461038961038961
Ceftiofur : 0.0
Tetraciclina : 0.5194805194805194
Gentamicina : 0.6233766233766234
Penicillina : 0.0
Ampicillina : 0.0
Sulfametossazolo_trimethoprim : 0.01948051948051948
Clindamicina : 0.2662337662337662
Enrofloxacin : 0.6688311688311688


Unnamed: 0_level_0,Eritromicina,Tetraciclina,Gentamicina,Clindamicina,Enrofloxacin
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
V13,0,0,0,1,0
V142,0,1,1,1,0
V151,1,1,0,1,0
V160,1,0,0,1,0
V161,1,1,0,1,0
...,...,...,...,...,...
V800,1,0,1,1,0
V82,1,1,0,1,1
V90,1,0,0,1,0
V91,1,1,0,1,0


aad(6) : 0.935064935064935
ANT(6)-Ia : 0.8246753246753247
APH(2'')-IIIa : 1.0
APH(3')-IIIa : 0.9025974025974026
catS : 0.9675324675324676
dfrF : 0.9805194805194806
E. faecalis chloramphenicol acetyltransferase : 0.9935064935064936
Erm(47) : 0.987012987012987
ErmB : 0.8181818181818182
fexA : 0.9935064935064936
L._reuteri cat-TC : 1.0
lmrP : 0.006493506493506494
lnuC : 0.987012987012987
lnuD : 0.9935064935064936
lsaC : 0.961038961038961
lsaE : 0.7857142857142857
mefE : 0.8506493506493507
optrA : 0.9935064935064936
poxtA : 0.9935064935064936
SAT-4 : 0.922077922077922
tet(40) : 0.987012987012987
tet(L) : 0.9935064935064936
tetM : 0.8181818181818182
tetO : 0.7402597402597403
tetS : 0.9805194805194806
tetT : 0.974025974025974
vatE : 0.9935064935064936


Unnamed: 0_level_0,ANT(6)-Ia,ErmB,lsaE,tetM,tetO
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
V13,0,0,0,0,0
V142,0,0,0,0,0
V151,0,0,0,0,0
V160,0,0,0,0,0
V161,0,0,0,0,0
...,...,...,...,...,...
V800,0,0,0,1,0
V82,0,0,0,0,0
V90,0,0,0,0,0
V91,0,0,0,0,0


fbp54 : 0.0
gbs0630 : 0.9935064935064936
gbs0631 : 0.9935064935064936
gbs0632 : 0.9935064935064936
hasC : 0.0
lmb : 0.9935064935064936
mf2 : 0.961038961038961
mf3 : 0.6753246753246753
scpA : 0.9935064935064936
sda : 0.8766233766233766
ska : 0.9935064935064936
slo : 0.9935064935064936
smeZ : 0.9935064935064936
spec : 0.974025974025974
speg : 0.9090909090909091
spek : 0.961038961038961
spel : 0.974025974025974
spem : 0.948051948051948


Unnamed: 0_level_0,mf3
ID Strain,Unnamed: 1_level_1
V13,0
V142,1
V151,0
V160,0
V161,1
...,...
V800,0
V82,1
V90,0
V91,1


In [16]:
# Define a function for standard scaling
def standard_scaler(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

# Define a function for dimensionality reduction using PCA
def dimensionality_reduction(X_train, X_test, n_components):
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    X_train_pca = pd.DataFrame(X_train_pca)
    X_test_pca = pd.DataFrame(X_test_pca)
    #print(X_train_pca.shape)
    return X_train_pca, X_test_pca

def makeScoreMeanWithoutNaN(metrics):
    for name, metrica in metrics.items():
        print(name)
        print(metrics[name])
        metrics[name] = metrics[name][~np.isnan(metrics[name])]
        print(metrics[name])
        metrics[name] = np.mean(metrics[name])
        print(metrics[name])
    print(metrics)
    return metrics

In [17]:
# define the models
models = {
        'LogisticRegression': LogisticRegression(random_state=RANDOM_STATE),
        'Ridge' : RidgeClassifier(random_state=RANDOM_STATE),
        'DecisionTree': DecisionTreeClassifier(random_state=RANDOM_STATE),
        'K-nn': KNeighborsClassifier(),
        'RandomForest': RandomForestClassifier(random_state=RANDOM_STATE),
        'BernoulliNB': BernoulliNB(),
        'GaussianNB': GaussianNB(),
        #'NearestCentroid': NearestCentroid()
        'SVC' : SVC(),
        'LinearSVC' : LinearSVC()
        #'LabelPropagation' : LabelPropagation(),
        #'LabelSpreading' : LabelSpreading(),
        #'SGDClassifier' : SGDClassifier()
}

n_classes = [0,1]
# Hyperparameter tuning using RandomizedSearchCV
param_grid = {'LogisticRegression': {'C': np.logspace(-4, 4, 25), 
                                    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                                    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
                                    'fit_intercept': [True, False],
                                    'intercept_scaling': [0.5, 1, 2],
                                    'class_weight': [None, 'balanced']
                                    },
              'Ridge' : {'alpha': np.logspace(-5, 5, 75)},
              'DecisionTree': {'ccp_alpha': [0.0] + list(np.logspace(-3, 1, 25)),
                                'class_weight': [None, 'balanced'],
                                'criterion': ['gini', 'entropy', 'log_loss'],
                                'max_depth': [None] + list(range(1, 20)),
                                'max_features': [None, 'auto', 'sqrt', 'log2'],
                                'min_samples_leaf': range(1, 10),
                                'min_samples_split': range(2, 10),
                                'splitter': ['best', 'random']
                                },
              'K-nn': {'n_neighbors': list(range(1, 20, 1)),
                        'weights': ['uniform', 'distance'],
                        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                        'p': [1,2]
                        },
              'RandomForest': {'ccp_alpha': [0] + list(np.logspace(-3, 1, 25)),
                                'class_weight': [None, 'balanced'],
                                'n_estimators': range(50,500,50),
                                'max_features': [None, 'auto', 'sqrt', 'log2'],
                                'max_depth' : [None,4,6,8,10],
                                'criterion' :['gini', 'entropy']
                                },
              'BernoulliNB': {'alpha': np.logspace(-2, 1, 10),
                            'fit_prior': [True, False],
                            'class_prior': [None, [0.1,]* len(n_classes)],
                            'binarize': [None, -5, -2, 0.0, 2, 5, 10.0]
                            },
              'GaussianNB': {'var_smoothing': np.logspace(0,-9, num=20)
                             },
              'NearestCentroid': {'shrink_threshold': np.logspace(0, 1, 20),
                                'metric': ['euclidean', 'manhattan']
                                },
              'SVC': {'C': np.logspace(-4, 4, 25),
                      'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                      'degree': range(2,5),
                      'gamma': np.logspace(-3, 1, 25)},
              'LabelPropagation': {'n_neighbors': [7, 21, 41, 81, 121, 181, 241],
                                   'gamma': [0.1, 1, 5, 10, 20, 30, 50]},
              'LabelSpreading': {'n_neighbors': [7, 21, 41, 81, 121, 181, 241],
                                'gamma': [0.1, 1, 5, 10, 20, 30, 50],
                                'alpha': [0.15, 0.2, 0.35, 0.55, 0.75, 0.95]},
              'SGDClassifier': {'loss' : ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'],
                            'penalty' : ['l1', 'l2', 'elasticnet'],
                            'alpha' : np.logspace(-4, 4, 25),
                            'learning_rate' : ['constant', 'optimal', 'invscaling', 'adaptive'],
                            'class_weight' : [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}],
                            'eta0' : [1, 10, 100]},
              'LinearSVC': {'penalty': ['l1', 'l2'],
                            'loss': ['hinge', 'squared_hinge'],
                            'class_weight': [None, 'balanced']}
              }

In [18]:
metrics = ['accuracy', 'recall_weighted', 'precision_weighted','f1_weighted']
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [19]:
def makeScore(y_test, y_pred):
    score = {}
    
    score['acc'] = accuracy_score(y_test, y_pred)
    score['b_acc'] = balanced_accuracy_score(y_test, y_pred)
    score['st'] = score['acc'].std()
    score['prec'] = precision_score(y_test, y_pred, average='weighted')
    score['rec'] = recall_score(y_test, y_pred, average='weighted')
    score['f1'] = f1_score(y_test, y_pred, average='weighted')
    
    return score

In [20]:
def makeCrossValidation(model, X_train, y_train):
    score = {}
    cv = cross_validate(estimator=model, X=X_train, y=y_train,
                        scoring=metrics, cv=skfold, 
                        n_jobs=N_JOBS, verbose=0)

    score['acc'] = cv.get('test_accuracy').mean()  
    score['st'] = cv.get('test_accuracy').std()
    score['prec'] = cv.get('test_precision_weighted').mean()
    score['rec'] = cv.get('test_recall_weighted').mean()
    score['f1'] = cv.get('test_f1_weighted').mean()
    
    return score

In [21]:
def makeTuning(model, X_train, y_train, name):
    score = {}
    params = param_grid[name]
    rs = RandomizedSearchCV(estimator=model, param_distributions=params,
                            scoring=metrics, refit="accuracy", cv=skfold, 
                            n_jobs=N_JOBS, random_state=RANDOM_STATE, verbose=0)
    rs.fit(X_train, y_train)
    
    results = rs.cv_results_
    model_best = rs.best_estimator_
    parametri = model.get_params()
    cv_best = rs.best_score_
    
    model_best.fit(X_train, y_train)
    score = makeCrossValidation(model_best, X_train, y_train)
    
    return model_best, score

In [23]:
# create an empty dataframe to store the metrics
#Tutte le metriche in cv e con st, la metrica nella tesi è quella
score_target = {}
pred_ensemble = pd.DataFrame()
pred_ensemble_best = pd.DataFrame()

X = maldi
for str_target, target in targets.items():
    columns = target.columns
    for column in columns:
       print("Colonna:"+column)    
       y = target[column]
       
       metrics_df = pd.DataFrame(columns=['Target', 'Model', 'Accuracy CV', 'St. Dev. CV', 
                                   'Precision CV', 'Recall CV','F1-Score CV','Accuracy','Bal. Accuracy'])
       
       #parameter range for BernoulliNB in base of the number of classes in the target
       n_classes = np.unique(y)
       param_grid['BernoulliNB']['class_prior'] = [None, [0.1,]* len(n_classes)]
       
       # split the data into training and testing sets
       X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
       X_pca_train, X_pca_test = dimensionality_reduction(X_train, X_test, n_components=0.95)
       
       # evaluate the models on the original dataset and int the dataset reduced
       dataframes = {'' : (X_train, X_test),
                    '_PCA' : (X_pca_train, X_pca_test)}
       
       for name, model in models.items():
              for pca, dataframe in dataframes.items():
                     #print("Modello:"+name)
                     
                     X_train = dataframe[0]
                     X_test = dataframe[1]
                     model_base = model
                     model_best = model
                     
                     #Modello base: cross validation with score, fit, predict 
                     score_cv = makeCrossValidation(model_base, X_train, y_train)
                     
                     model_base.fit(X_train, y_train)
                     y_pred = model_base.predict(X_test)
                     
                     score = makeScore(y_test, y_pred)
                     pred_ensemble[name] = y_pred          
                     
                     ris = {'Target': column,
                            'Model': name+pca,
                            'Accuracy CV' : score_cv['acc'],
                            'St. Dev. CV' : score_cv['st'],
                            'Precision CV' : score_cv['prec'],
                            'Recall CV' : score_cv['rec'],
                            'F1-Score CV' : score_cv['f1'],
                            'Accuracy' : score['acc'],
                            'Bal. Accuracy' : score['b_acc']} 
                     #display(ris)
                     metrics_df = metrics_df.append(ris, ignore_index=True)  
                     
                     '''
                     if name == 'DecisionTree' or name == 'RandomForest':
                            print(name)
                            parametri = model_base.get_params()
                            print('Old Parametri:')
                            print(parametri)
                     '''
                     #Tuning iperparametri
                     model_best, score_cv = makeTuning(model_best, X_train, y_train, name)
                     
                     '''
                     if name == 'DecisionTree' or name == 'RandomForest':
                            print('Parametri possibili:')
                            print(params)
                            print('New Parametri:')
                            print(parametri)
                            print(cv_best)
                     '''
                     #score_cv = makeCrossValidation(model_best, X_train, y_train)    
                     
                     model_best.fit(X_train, y_train)
                     y_pred = model_best.predict(X_test)
                     
                     score = makeScore(y_test, y_pred)
                     pred_ensemble_best[name] = y_pred
                     
                     ris = {'Target': column,
                            'Model': name+'_Best'+pca,
                            'Accuracy CV' : score_cv['acc'],
                            'St. Dev. CV' : score_cv['st'],
                            'Precision CV' : score_cv['prec'],
                            'Recall CV' : score_cv['rec'],
                            'F1-Score CV' : score_cv['f1'],
                            'Accuracy' : score['acc'],
                            'Bal. Accuracy' : score['b_acc']} 
                     #display(ris)
                     metrics_df = metrics_df.append(ris, ignore_index=True)
       pred_ensemble['Target'] = y_test.values 
       display(pred_ensemble)
       print("Model with tuning")
       pred_ensemble_best['Target'] = y_test.values
       display(pred_ensemble_best)
       '''
       score_cv = makeScore(y_test, y_pred_ensemble)
       
       ris = {'Target': column,
              'Model': 'Ensemble_All_Models',
              'Accuracy CV' : score_cv['acc'],
              'St. Dev. CV' : score_cv['st'],
              'Precision CV' : score_cv['prec'],
              'Recall CV' : score_cv['rec'],
              'F1-Score CV' : score_cv['f1'],
              'Accuracy' : score['acc'],
              'Bal. Accuracy' : score['b_acc']} 
       #display(ris)
       metrics_df = metrics_df.append(ris, ignore_index=True)
       
       y_pred_ensemble_redu = makeEnsemble(pred_ensemble)
       score_cv = makeScore(y_test, y_pred_ensemble_redu)
       
       ris = {'Target': column,
              'Model': 'Ensemble_Partial_Models',
              'Accuracy CV' : score_cv['acc'],
              'St. Dev. CV' : score_cv['st'],
              'Precision CV' : score_cv['prec'],
              'Recall CV' : score_cv['rec'],
              'F1-Score CV' : score_cv['f1'],
              'Accuracy' : score['acc'],
              'Bal. Accuracy' : score['b_acc']} 
       #display(ris)
       metrics_df = metrics_df.append(ris, ignore_index=True)
       
       
       score_cv = makeScore(y_test, y_pred_ensemble_best)
       
       ris = {'Target': column,
              'Model': 'Ensemble_All_Models_Tuning',
              'Accuracy CV' : score_cv['acc'],
              'St. Dev. CV' : score_cv['st'],
              'Precision CV' : score_cv['prec'],
              'Recall CV' : score_cv['rec'],
              'F1-Score CV' : score_cv['f1'],
              'Accuracy' : score['acc'],
              'Bal. Accuracy' : score['b_acc']} 
       #display(ris)
       metrics_df = metrics_df.append(ris, ignore_index=True)
       
       y_pred_ensemble_redu_best = makeEnsemble(pred_ensemble_best)
       score_cv = makeScore(y_test, y_pred_ensemble_redu_best)
       
       ris = {'Target': column,
              'Model': 'Ensemble_Partial_Models_Tuning',
              'Accuracy CV' : score_cv['acc'],
              'St. Dev. CV' : score_cv['st'],
              'Precision CV' : score_cv['prec'],
              'Recall CV' : score_cv['rec'],
              'F1-Score CV' : score_cv['f1'],
              'Accuracy' : score['acc'],
              'Bal. Accuracy' : score['b_acc']} 
       #display(ris)
       metrics_df = metrics_df.append(ris, ignore_index=True)
       '''
       print('\n')
       score_target[column] = metrics_df
       #metrics_df.to_csv('..\Risultati\Results_Def_'+str(n)+'picchi\Results_'+column+'_'+str(n)+'.csv', index = False)

       display(score_target[column])

Colonna:Eritromicina


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,1,1,1,1,1,1,1,1,1,1
1,1,1,1,0,1,1,1,0,1,1
2,1,1,1,1,1,1,1,0,1,0
3,1,1,0,0,0,1,0,0,1,0
4,1,1,1,1,1,1,0,1,1,1
5,1,1,1,0,1,0,0,1,1,1
6,1,1,0,1,0,1,0,0,1,1
7,1,1,0,1,1,1,0,0,1,0
8,1,1,0,0,1,1,0,0,1,1
9,1,1,0,1,1,1,1,1,1,1


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,1,1,1,1,1,1,1,1,1,1
1,0,1,1,1,1,1,0,1,0,1
2,0,1,0,0,1,1,0,1,0,0
3,1,1,0,1,0,1,0,1,1,0
4,1,1,1,1,1,1,1,1,1,1
5,0,1,1,0,0,1,0,1,0,1
6,0,1,0,0,0,1,0,1,0,1
7,0,1,0,0,1,1,0,1,0,0
8,0,1,1,0,1,1,0,1,0,1
9,1,1,1,1,1,1,1,1,1,1






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,Eritromicina,LogisticRegression,0.528667,0.010614,0.279601,0.528667,0.365726,0.580645,0.5
1,Eritromicina,LogisticRegression_Best,0.587333,0.109309,0.537911,0.587333,0.542752,0.677419,0.700855
2,Eritromicina,LogisticRegression_PCA,0.528667,0.010614,0.279601,0.528667,0.365726,0.580645,0.5
3,Eritromicina,LogisticRegression_Best_PCA,0.603,0.118773,0.629034,0.603,0.591286,0.677419,0.700855
4,Eritromicina,Ridge,0.528667,0.010614,0.279601,0.528667,0.365726,0.580645,0.5
5,Eritromicina,Ridge_Best,0.528667,0.010614,0.279601,0.528667,0.365726,0.580645,0.5
6,Eritromicina,Ridge_PCA,0.528667,0.010614,0.279601,0.528667,0.365726,0.580645,0.5
7,Eritromicina,Ridge_Best_PCA,0.528667,0.010614,0.279601,0.528667,0.365726,0.580645,0.5
8,Eritromicina,DecisionTree,0.505333,0.074858,0.50595,0.505333,0.500182,0.580645,0.542735
9,Eritromicina,DecisionTree_Best,0.585333,0.038678,0.593426,0.585333,0.582772,0.387097,0.344017


Colonna:Tetraciclina


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,1,1,1,1,1,0,1
1,0,0,1,0,0,0,1,1,0,1
2,0,0,1,0,0,1,1,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,1
6,0,0,0,0,0,0,0,0,0,1
7,0,0,0,1,1,1,1,1,0,1
8,0,0,0,0,0,0,0,0,0,0
9,0,0,1,1,1,1,1,1,0,1


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,1,1,0,1,1,1,1,0,1,1
1,1,0,0,0,1,0,1,0,1,1
2,0,0,1,0,0,1,1,0,0,0
3,0,0,0,0,0,0,0,0,1,0
4,0,0,1,1,0,0,0,0,1,0
5,0,0,1,0,0,1,0,0,0,1
6,0,0,0,0,0,0,0,0,0,1
7,1,0,1,1,1,1,1,0,0,1
8,0,0,0,0,0,0,0,0,0,0
9,1,0,0,1,1,1,1,0,1,1






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,Tetraciclina,LogisticRegression,0.528667,0.010614,0.279601,0.528667,0.365726,0.483871,0.5
1,Tetraciclina,LogisticRegression_Best,0.683667,0.098862,0.695475,0.683667,0.67434,0.645161,0.647917
2,Tetraciclina,LogisticRegression_PCA,0.528667,0.010614,0.279601,0.528667,0.365726,0.483871,0.5
3,Tetraciclina,LogisticRegression_Best_PCA,0.690333,0.058428,0.695028,0.690333,0.690077,0.774194,0.775
4,Tetraciclina,Ridge,0.528667,0.010614,0.279601,0.528667,0.365726,0.483871,0.5
5,Tetraciclina,Ridge_Best,0.626333,0.045048,0.665476,0.626333,0.59427,0.612903,0.620833
6,Tetraciclina,Ridge_PCA,0.528667,0.010614,0.279601,0.528667,0.365726,0.483871,0.5
7,Tetraciclina,Ridge_Best_PCA,0.618333,0.052026,0.657309,0.618333,0.585316,0.612903,0.620833
8,Tetraciclina,DecisionTree,0.609333,0.051366,0.609929,0.609333,0.607369,0.580645,0.579167
9,Tetraciclina,DecisionTree_Best,0.633,0.111626,0.634414,0.633,0.630647,0.677419,0.679167


Colonna:Gentamicina


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,1
6,0,0,1,1,1,1,1,1,0,1
7,0,0,1,0,0,1,1,1,0,1
8,0,0,0,1,1,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,1


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,1
6,1,0,1,1,1,1,1,0,1,1
7,1,0,1,1,1,1,1,0,1,1
8,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,1






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,Gentamicina,LogisticRegression,0.61,0.012247,0.37225,0.61,0.462308,0.677419,0.5
1,Gentamicina,LogisticRegression_Best,0.657,0.102991,0.676783,0.657,0.65935,0.709677,0.654762
2,Gentamicina,LogisticRegression_PCA,0.61,0.012247,0.37225,0.61,0.462308,0.677419,0.5
3,Gentamicina,LogisticRegression_Best_PCA,0.657,0.102991,0.676783,0.657,0.65935,0.709677,0.654762
4,Gentamicina,Ridge,0.61,0.012247,0.37225,0.61,0.462308,0.677419,0.5
5,Gentamicina,Ridge_Best,0.61,0.012247,0.37225,0.61,0.462308,0.677419,0.5
6,Gentamicina,Ridge_PCA,0.61,0.012247,0.37225,0.61,0.462308,0.677419,0.5
7,Gentamicina,Ridge_Best_PCA,0.61,0.012247,0.37225,0.61,0.462308,0.677419,0.5
8,Gentamicina,DecisionTree,0.496333,0.067251,0.506726,0.496333,0.497869,0.645161,0.580952
9,Gentamicina,DecisionTree_Best,0.642333,0.092205,0.64675,0.642333,0.642464,0.741935,0.652381


Colonna:Clindamicina


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,1,1,1,1,1,1,1,1,1,1
1,1,1,1,1,1,1,1,1,1,1
2,1,1,1,0,0,1,1,0,1,0
3,1,1,0,0,0,0,0,0,1,0
4,1,1,1,1,1,1,1,1,1,1
5,1,1,1,0,0,1,0,1,1,1
6,1,1,1,1,1,1,1,1,1,1
7,1,1,1,1,1,1,1,1,1,1
8,1,1,0,0,0,0,0,0,1,0
9,1,1,1,1,1,1,1,1,1,1


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,1,1,1,1,1,1,1,1,1,1
1,1,1,1,1,1,1,1,1,1,1
2,0,1,1,0,0,1,0,1,0,0
3,0,1,1,0,0,1,0,1,0,0
4,0,1,1,1,1,1,1,1,0,1
5,0,1,1,0,0,1,0,1,0,1
6,1,1,1,1,1,1,1,1,1,1
7,1,1,1,1,1,1,1,1,1,1
8,0,1,1,0,0,1,0,1,0,0
9,1,1,1,1,1,1,1,1,1,1






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,Clindamicina,LogisticRegression,0.756,0.004899,0.57156,0.756,0.650961,0.645161,0.5
1,Clindamicina,LogisticRegression_Best,0.813,0.106209,0.81927,0.813,0.815197,0.83871,0.834091
2,Clindamicina,LogisticRegression_PCA,0.756,0.004899,0.57156,0.756,0.650961,0.645161,0.5
3,Clindamicina,LogisticRegression_Best_PCA,0.813,0.106209,0.81927,0.813,0.815197,0.83871,0.834091
4,Clindamicina,Ridge,0.756,0.004899,0.57156,0.756,0.650961,0.645161,0.5
5,Clindamicina,Ridge_Best,0.756,0.004899,0.57156,0.756,0.650961,0.645161,0.5
6,Clindamicina,Ridge_PCA,0.756,0.004899,0.57156,0.756,0.650961,0.645161,0.5
7,Clindamicina,Ridge_Best_PCA,0.756,0.004899,0.57156,0.756,0.650961,0.645161,0.5
8,Clindamicina,DecisionTree,0.738667,0.075125,0.758119,0.738667,0.736356,0.83871,0.834091
9,Clindamicina,DecisionTree_Best,0.772333,0.084351,0.742541,0.772333,0.750979,0.677419,0.627273


Colonna:Enrofloxacin


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1
2,0,0,1,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,1,0,1,1,0,1
6,0,0,0,1,0,1,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,1


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1
2,1,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,1,0,0,0,1,0,1,0,0,1
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,1






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,Enrofloxacin,LogisticRegression,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5
1,Enrofloxacin,LogisticRegression_Best,0.731333,0.066378,0.720613,0.731333,0.704548,0.645161,0.613445
2,Enrofloxacin,LogisticRegression_PCA,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5
3,Enrofloxacin,LogisticRegression_Best_PCA,0.707333,0.077292,0.699277,0.707333,0.683271,0.612903,0.584034
4,Enrofloxacin,Ridge,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5
5,Enrofloxacin,Ridge_Best,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5
6,Enrofloxacin,Ridge_PCA,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5
7,Enrofloxacin,Ridge_Best_PCA,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5
8,Enrofloxacin,DecisionTree,0.600667,0.054412,0.557115,0.600667,0.570603,0.580645,0.560924
9,Enrofloxacin,DecisionTree_Best,0.699333,0.01635,0.489334,0.699333,0.575708,0.548387,0.5


Colonna:ANT(6)-Ia


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,1,0,1,1,0,1
3,0,0,1,1,1,1,1,1,0,1
4,0,0,0,0,0,0,0,0,0,0
5,0,0,0,1,1,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,0,0,1,1,1,1,1,1,0,1
9,0,0,0,0,0,0,0,0,0,0


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,1,0,1,0,0,1
3,1,0,0,1,1,0,1,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,1,0,0,1,1,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,1,0,0,1,1,0,1,0,0,1
9,0,0,0,0,0,0,0,0,0,0






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,ANT(6)-Ia,LogisticRegression,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5
1,ANT(6)-Ia,LogisticRegression_Best,0.853,0.04367,0.813005,0.853,0.829339,0.83871,0.845238
2,ANT(6)-Ia,LogisticRegression_PCA,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5
3,ANT(6)-Ia,LogisticRegression_Best_PCA,0.845,0.048694,0.829784,0.845,0.83164,0.774194,0.702381
4,ANT(6)-Ia,Ridge,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5
5,ANT(6)-Ia,Ridge_Best,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5
6,ANT(6)-Ia,Ridge_PCA,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5
7,ANT(6)-Ia,Ridge_Best_PCA,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5
8,ANT(6)-Ia,DecisionTree,0.764667,0.062445,0.790293,0.764667,0.772303,0.806452,0.824405
9,ANT(6)-Ia,DecisionTree_Best,0.837333,0.003266,0.701138,0.837333,0.763204,0.774194,0.5


Colonna:ErmB


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,1,0,0,0,0
5,0,0,0,1,0,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,0,0,0,1,0,0,1,1,0,0
9,0,0,0,0,0,0,0,0,0,0


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,1,0,0,0,0,1
3,0,0,0,0,1,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,1,0,0,1,1,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,1,0,0,1,1,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,ErmB,LogisticRegression,0.821333,0.017588,0.674898,0.821333,0.740866,0.806452,0.5
1,ErmB,LogisticRegression_Best,0.861667,0.020602,0.870331,0.861667,0.830359,0.806452,0.626667
2,ErmB,LogisticRegression_PCA,0.821333,0.017588,0.674898,0.821333,0.740866,0.806452,0.5
3,ErmB,LogisticRegression_Best_PCA,0.827667,0.086574,0.850456,0.827667,0.82071,0.741935,0.586667
4,ErmB,Ridge,0.821333,0.017588,0.674898,0.821333,0.740866,0.806452,0.5
5,ErmB,Ridge_Best,0.821333,0.017588,0.674898,0.821333,0.740866,0.806452,0.5
6,ErmB,Ridge_PCA,0.821333,0.017588,0.674898,0.821333,0.740866,0.806452,0.5
7,ErmB,Ridge_Best_PCA,0.821333,0.017588,0.674898,0.821333,0.740866,0.806452,0.5
8,ErmB,DecisionTree,0.764333,0.052553,0.769958,0.764333,0.764941,0.741935,0.586667
9,ErmB,DecisionTree_Best,0.837667,0.023842,0.772835,0.837667,0.790652,0.806452,0.626667


Colonna:lsaE


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,0,0,1,0,1
3,0,0,1,1,0,1,1,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,0,0,0,1,1,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,1,0,0,0,0,0,0,0
8,0,0,1,1,1,0,1,1,0,1
9,0,0,0,0,0,0,0,0,0,0


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,1,0,1,0,1,1
3,0,0,0,0,0,0,0,0,1,1
4,0,0,0,0,0,0,0,0,0,0
5,1,0,0,1,1,0,1,0,1,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,1,0,0,1,1,0,1,0,1,1
9,0,0,0,0,0,0,0,0,0,0






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,lsaE,LogisticRegression,0.805,0.01453,0.648236,0.805,0.718104,0.709677,0.5
1,lsaE,LogisticRegression_Best,0.878,0.043886,0.851576,0.878,0.854393,0.870968,0.843434
2,lsaE,LogisticRegression_PCA,0.805,0.01453,0.648236,0.805,0.718104,0.709677,0.5
3,lsaE,LogisticRegression_Best_PCA,0.894333,0.0543,0.890149,0.894333,0.880182,0.806452,0.732323
4,lsaE,Ridge,0.805,0.01453,0.648236,0.805,0.718104,0.709677,0.5
5,lsaE,Ridge_Best,0.805,0.01453,0.648236,0.805,0.718104,0.709677,0.5
6,lsaE,Ridge_PCA,0.805,0.01453,0.648236,0.805,0.718104,0.709677,0.5
7,lsaE,Ridge_Best_PCA,0.805,0.01453,0.648236,0.805,0.718104,0.709677,0.5
8,lsaE,DecisionTree,0.828333,0.056843,0.837113,0.828333,0.826829,0.774194,0.676768
9,lsaE,DecisionTree_Best,0.845667,0.046279,0.806093,0.845667,0.813859,0.83871,0.722222


Colonna:tetM


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1
5,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,1,1,1,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0
9,0,0,1,0,0,0,0,0,0,0


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1
5,0,0,0,0,0,0,0,0,0,0
6,1,0,0,0,1,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,tetM,LogisticRegression,0.805,0.01453,0.648236,0.805,0.718104,0.870968,0.5
1,tetM,LogisticRegression_Best,0.764,0.018547,0.749739,0.764,0.750651,0.806452,0.462963
2,tetM,LogisticRegression_PCA,0.805,0.01453,0.648236,0.805,0.718104,0.870968,0.5
3,tetM,LogisticRegression_Best_PCA,0.780667,0.03144,0.750803,0.780667,0.760608,0.83871,0.481481
4,tetM,Ridge,0.805,0.01453,0.648236,0.805,0.718104,0.870968,0.5
5,tetM,Ridge_Best,0.805,0.01453,0.648236,0.805,0.718104,0.870968,0.5
6,tetM,Ridge_PCA,0.805,0.01453,0.648236,0.805,0.718104,0.870968,0.5
7,tetM,Ridge_Best_PCA,0.805,0.01453,0.648236,0.805,0.718104,0.870968,0.5
8,tetM,DecisionTree,0.74,0.074952,0.78255,0.74,0.749811,0.774194,0.444444
9,tetM,DecisionTree_Best,0.813,0.019675,0.688236,0.813,0.743882,0.870968,0.5


Colonna:tetO


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,1,0,1
3,0,0,1,1,0,0,1,1,0,1
4,0,0,0,0,0,1,0,0,0,0
5,0,0,0,1,0,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,1,0,0,0,0,0,0,0
8,0,0,1,1,1,0,1,1,0,1
9,0,0,0,0,0,0,0,0,0,0


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,1
3,0,0,0,1,1,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,1,0,0,1,0,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0
8,1,0,0,1,1,0,1,0,0,1
9,0,0,0,0,0,0,0,0,0,0






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,tetO,LogisticRegression,0.74,0.016733,0.54788,0.74,0.629532,0.741935,0.5
1,tetO,LogisticRegression_Best,0.756333,0.034163,0.680438,0.756333,0.695621,0.806452,0.665761
2,tetO,LogisticRegression_PCA,0.74,0.016733,0.54788,0.74,0.629532,0.741935,0.5
3,tetO,LogisticRegression_Best_PCA,0.732,0.039376,0.697597,0.732,0.699789,0.870968,0.831522
4,tetO,Ridge,0.74,0.016733,0.54788,0.74,0.629532,0.741935,0.5
5,tetO,Ridge_Best,0.74,0.016733,0.54788,0.74,0.629532,0.741935,0.5
6,tetO,Ridge_PCA,0.74,0.016733,0.54788,0.74,0.629532,0.741935,0.5
7,tetO,Ridge_Best_PCA,0.74,0.016733,0.54788,0.74,0.629532,0.741935,0.5
8,tetO,DecisionTree,0.691,0.065078,0.698144,0.691,0.687897,0.774194,0.847826
9,tetO,DecisionTree_Best,0.772667,0.052921,0.702416,0.772667,0.721038,0.806452,0.665761


Colonna:mf3


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,1,1,0,1
1,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,1,0,0,0,1
3,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,0,0,1,0,0,0,0,0,0,0
6,0,0,0,0,0,0,1,0,0,1
7,0,0,1,1,1,1,1,0,0,1
8,0,0,0,0,0,0,0,0,0,1
9,0,0,1,1,1,0,1,1,0,1


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,1,1,1,0,1,0,0,1
1,0,0,0,0,0,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0
5,0,0,1,0,0,0,0,0,0,0
6,0,0,0,0,0,0,1,0,0,1
7,1,0,1,1,1,0,1,0,0,1
8,0,0,0,0,0,0,0,0,0,1
9,1,0,0,1,1,0,1,0,0,1






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,mf3,LogisticRegression,0.707333,0.014629,0.500534,0.707333,0.586171,0.548387,0.5
1,mf3,LogisticRegression_Best,0.723667,0.039783,0.746961,0.723667,0.719577,0.645161,0.62605
2,mf3,LogisticRegression_PCA,0.707333,0.014629,0.500534,0.707333,0.586171,0.548387,0.5
3,mf3,LogisticRegression_Best_PCA,0.707667,0.026365,0.704626,0.707667,0.682023,0.774194,0.762605
4,mf3,Ridge,0.707333,0.014629,0.500534,0.707333,0.586171,0.548387,0.5
5,mf3,Ridge_Best,0.707333,0.014629,0.500534,0.707333,0.586171,0.548387,0.5
6,mf3,Ridge_PCA,0.707333,0.014629,0.500534,0.707333,0.586171,0.548387,0.5
7,mf3,Ridge_Best_PCA,0.707333,0.014629,0.500534,0.707333,0.586171,0.548387,0.5
8,mf3,DecisionTree,0.674333,0.078469,0.701242,0.674333,0.678563,0.741935,0.720588
9,mf3,DecisionTree_Best,0.740333,0.053129,0.71733,0.740333,0.714825,0.741935,0.733193


Colonna:subspecies


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,2,2,0,0,0,0,0,0,2,0
1,2,2,0,1,1,1,1,1,2,1
2,2,2,1,1,1,1,1,1,2,1
3,2,2,1,1,1,1,1,1,2,1
4,2,2,0,0,0,0,0,0,2,0
5,2,2,1,1,1,1,1,1,2,1
6,2,2,2,2,2,2,2,2,2,2
7,2,2,2,2,2,2,2,2,2,2
8,2,2,1,1,1,1,1,1,2,1
9,2,2,0,0,0,1,0,0,2,0


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,0,0,0,0,0,0,0,2,0,0
1,0,0,1,1,1,1,1,2,0,1
2,1,1,1,1,1,1,1,2,1,1
3,1,0,0,1,1,1,1,2,1,1
4,0,0,1,0,0,0,0,2,0,0
5,1,1,1,1,1,1,1,2,1,1
6,2,2,2,2,2,2,2,2,2,2
7,2,2,2,2,2,2,2,2,2,2
8,1,0,1,1,1,1,1,2,1,1
9,0,0,0,0,0,1,0,2,0,0






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,subspecies,LogisticRegression,0.390667,0.025768,0.153284,0.390667,0.219989,0.258065,0.333333
1,subspecies,LogisticRegression_Best,0.950667,0.040969,0.95846,0.950667,0.949012,1.0,1.0
2,subspecies,LogisticRegression_PCA,0.390667,0.025768,0.153284,0.390667,0.219989,0.258065,0.333333
3,subspecies,LogisticRegression_Best_PCA,0.926333,0.048949,0.934361,0.926333,0.925009,0.967742,0.974359
4,subspecies,Ridge,0.390667,0.025768,0.153284,0.390667,0.219989,0.258065,0.333333
5,subspecies,Ridge_Best,0.739667,0.033173,0.603954,0.739667,0.642958,0.645161,0.717949
6,subspecies,Ridge_PCA,0.390667,0.025768,0.153284,0.390667,0.219989,0.258065,0.333333
7,subspecies,Ridge_Best_PCA,0.739667,0.033173,0.603954,0.739667,0.642958,0.645161,0.717949
8,subspecies,DecisionTree,0.927333,0.068513,0.93617,0.927333,0.926372,1.0,1.0
9,subspecies,DecisionTree_Best,0.846333,0.104788,0.848908,0.846333,0.842684,0.967742,0.974359


Colonna:ST


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9
1,ST9,ST9,ST701,ST13,ST619,ST9,ST13,ST9,ST9,ST338
2,ST9,ST9,ST704,ST338,ST338,ST9,ST9,ST338,ST9,ST699
3,ST9,ST9,ST634,ST338,ST634,ST9,ST9,ST338,ST9,ST634
4,ST9,ST9,ST2,ST13,ST2,ST705,ST2,ST9,ST9,ST24
5,ST9,ST9,ST700,ST705,ST700,ST302,ST13,ST700,ST9,ST701
6,ST9,ST9,ST302,ST308,ST454,ST533,ST302,ST454,ST9,ST453
7,ST9,ST9,ST460,ST460,ST460,ST460,ST460,ST454,ST9,ST460
8,ST9,ST9,ST688,ST338,ST338,ST338,ST338,ST338,ST9,ST634
9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9


Model with tuning


Unnamed: 0,LogisticRegression,Ridge,DecisionTree,K-nn,RandomForest,BernoulliNB,GaussianNB,SVC,LinearSVC,Target
0,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9
1,ST95,ST9,ST9,ST231,ST9,ST9,ST2,ST9,ST94,ST338
2,ST460,ST9,ST338,ST338,ST9,ST9,ST9,ST9,ST94,ST699
3,ST634,ST9,ST338,ST634,ST9,ST9,ST634,ST9,ST9,ST634
4,ST2,ST9,ST9,ST2,ST9,ST9,ST2,ST9,ST94,ST24
5,ST700,ST9,ST338,ST700,ST9,ST9,ST700,ST9,ST705,ST701
6,ST454,ST454,ST454,ST454,ST454,ST9,ST533,ST9,ST302,ST453
7,ST460,ST9,ST460,ST460,ST460,ST9,ST460,ST9,ST94,ST460
8,ST688,ST9,ST338,ST338,ST9,ST9,ST688,ST9,ST94,ST634
9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9,ST9






Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy,Bal. Accuracy
0,ST,LogisticRegression,0.122,0.002449,0.01489,0.122,0.02654,0.16129,0.052632
1,ST,LogisticRegression_Best,0.334333,0.05393,0.316528,0.334333,0.315695,0.419355,0.331579
2,ST,LogisticRegression_PCA,0.122,0.002449,0.01489,0.122,0.02654,0.16129,0.052632
3,ST,LogisticRegression_Best_PCA,0.293,0.061123,0.253317,0.293,0.262756,0.322581,0.278947
4,ST,Ridge,0.122,0.002449,0.01489,0.122,0.02654,0.16129,0.052632
5,ST,Ridge_Best,0.162333,0.023842,0.048111,0.162333,0.06294,0.16129,0.052632
6,ST,Ridge_PCA,0.122,0.002449,0.01489,0.122,0.02654,0.16129,0.052632
7,ST,Ridge_Best_PCA,0.162333,0.023842,0.048253,0.162333,0.063162,0.16129,0.052632
8,ST,DecisionTree,0.235667,0.046063,0.235428,0.235667,0.22044,0.354839,0.360526
9,ST,DecisionTree_Best,0.204,0.04815,0.117957,0.204,0.142137,0.225806,0.189474
