In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import balanced_accuracy_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, make_scorer
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, StratifiedKFold, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression, RidgeClassifier, LassoCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.base import clone
from sklearn.naive_bayes import BernoulliNB, GaussianNB

import warnings
    
# warnings -> to silence warnings

warnings.filterwarnings("ignore")
np.set_printoptions(precision=5, suppress=True)


RANDOM_STATE = 1
N_JOBS = -1

class_names = ["Canis", "Dysg. Equisimilis", "Dysg. Dysgalactiae"]

map_target = {
    "Streptococcus canis": 0,
    "Streptococcus dysgalactiae subsp. equisimilis": 1,
    "Streptococcus dysgalactiae subsp. dysgalactiae": 2
}

map_target_inv = {
    0: "Strept. canis",
    1: "Strept. dysg. equisimilis",
    2: "Strept. dysg. dysgalactiae"
}
map_target_antibiotici = {
    "S" : 1,
    "NS" : 0
}

start = 9
n_antibiotici = 9
n_geni = 27
n_virulenza = 18

In [2]:
# Load the data
n = 46
df = pd.read_csv("../data/Dati_Matemaldomics_"+str(n)+"picchi.csv",
                    delimiter=';', index_col='ID Strain')

In [3]:
df['subspecies'] = df["Putative Subspecies"].map(map_target)

feat_agg = df.iloc[:,[7,8]]
display(feat_agg)
st = df.iloc[:,[4]]
display(st)
subspecies = df[['subspecies']]
maldi = df.iloc[:,start:start+n]
antibiotici = df.iloc[:,start+n:start+n+n_antibiotici]
geni_antibiotici = df.iloc[:,start+n+n_antibiotici:start+n+n_antibiotici+n_geni]
virulenza = df.iloc[:,start+n+n_antibiotici+n_geni:start+n+n_antibiotici+n_geni+n_virulenza]

maldi.fillna(0, inplace=True)
maldi = maldi.replace(',', '.', regex=True)
columns = maldi.columns
for column in columns:
    maldi[column] = maldi[column].astype(float)
display(maldi)

targets = {'antibiotici' : antibiotici,
            'geni_antibiotici' : geni_antibiotici,
            'virulenza' : virulenza}

for str_target,target in targets.items():
    columns = target.columns
    for column in columns:
        if str_target == 'antibiotici':
            target[column] = df[column].map(map_target_antibiotici)
        rapporto = (target[column] == 0).sum() / target.shape[0]
        #if (antibiotici[column] == 0).all() or (antibiotici[column] == 1).all():
        print(column+" : "+str(rapporto))
        if rapporto < 0.15 or rapporto > 0.85:
            target.drop([column], axis=1, inplace=True)
            
    display(target)
    
targets['st'] = st
target['subspecies'] = subspecies

Unnamed: 0_level_0,LANCEFIELD GROUP,Haemolysis
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1
V13,G,b
V142,G,b
V151,G,b
V160,G,b
V161,G,b
...,...,...
V800,C,a
V82,G,b
V90,G,b
V91,G,b


Unnamed: 0_level_0,ST
ID Strain,Unnamed: 1_level_1
V13,ST13
V142,ST23
V151,ST95
V160,ST15
V161,ST9
...,...
V800,ST307
V82,ST9
V90,ST13
V91,ST9


Unnamed: 0_level_0,"2223,140967","2241,073989","2262,75751","2679,802856","2978,296408","3159,441237","3354,28405","3364,608472","3397,909861","3418,174965",...,"9030,351844","9073,208159","9487,183195","10103,20284","10400,80576","10491,16654","10930,54833","13276,73249","14943,03835","15048,89449"
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
V13,0.000665,0.000180,0.000198,0.000059,0.000865,0.000497,0.000197,0.000272,0.000578,0.000198,...,0.000206,0.000133,0.000587,0.000180,0.000065,0.000065,0.000172,0.000076,0.000058,0.000142
V142,0.000648,0.000156,0.000192,0.000400,0.000698,0.000414,0.000684,0.000349,0.000617,0.000494,...,0.000219,0.000205,0.000465,0.000193,0.000142,0.000116,0.000165,0.000067,0.000067,0.000194
V151,0.000545,0.000331,0.000204,0.000277,0.000613,0.000464,0.000279,0.001031,0.000624,0.000409,...,0.000261,0.000263,0.000585,0.000235,0.000198,0.000152,0.000208,0.000084,0.000092,0.000069
V160,0.000509,0.000191,0.000193,0.000266,0.000489,0.000358,0.000628,0.000692,0.000564,0.000611,...,0.000311,0.000312,0.000083,0.000241,0.000242,0.000187,0.000245,0.000082,0.000075,0.000136
V161,0.000517,0.000118,0.000158,0.000163,0.000610,0.000502,0.000621,0.000315,0.000507,0.000638,...,0.000206,0.000224,0.000711,0.000242,0.000156,0.000116,0.000154,0.000093,0.000072,0.000203
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V800,0.000456,0.000253,0.000125,0.000061,0.000586,0.000365,0.000258,0.001095,0.000537,0.000214,...,0.000102,0.000195,0.000843,0.000250,0.000088,0.000109,0.000298,0.000060,0.000073,0.000059
V82,0.000459,0.000121,0.000147,0.000270,0.000571,0.000338,0.000596,0.000321,0.000537,0.000668,...,0.000263,0.000277,0.000452,0.000254,0.000211,0.000162,0.000203,0.000059,0.000070,0.000144
V90,0.000505,0.000154,0.000158,0.000181,0.000610,0.000473,0.000374,0.000525,0.000513,0.000377,...,0.000288,0.000310,0.000671,0.000237,0.000224,0.000185,0.000230,0.000089,0.000080,0.000137
V91,0.000520,0.000124,0.000169,0.000174,0.000597,0.000414,0.000572,0.000310,0.000532,0.000646,...,0.000237,0.000259,0.000514,0.000237,0.000202,0.000150,0.000175,0.000092,0.000078,0.000214


Eritromicina : 0.461038961038961
Ceftiofur : 0.0
Tetraciclina : 0.5194805194805194
Gentamicina : 0.6233766233766234
Penicillina : 0.0
Ampicillina : 0.0
Sulfametossazolo_trimethoprim : 0.01948051948051948
Clindamicina : 0.2662337662337662
Enrofloxacin : 0.6688311688311688


Unnamed: 0_level_0,Eritromicina,Tetraciclina,Gentamicina,Clindamicina,Enrofloxacin
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
V13,0,0,0,1,0
V142,0,1,1,1,0
V151,1,1,0,1,0
V160,1,0,0,1,0
V161,1,1,0,1,0
...,...,...,...,...,...
V800,1,0,1,1,0
V82,1,1,0,1,1
V90,1,0,0,1,0
V91,1,1,0,1,0


aad(6) : 0.935064935064935
ANT(6)-Ia : 0.8246753246753247
APH(2'')-IIIa : 1.0
APH(3')-IIIa : 0.9025974025974026
catS : 0.9675324675324676
dfrF : 0.9805194805194806
E. faecalis chloramphenicol acetyltransferase : 0.9935064935064936
Erm(47) : 0.987012987012987
ErmB : 0.8181818181818182
fexA : 0.9935064935064936
L._reuteri cat-TC : 1.0
lmrP : 0.006493506493506494
lnuC : 0.987012987012987
lnuD : 0.9935064935064936
lsaC : 0.961038961038961
lsaE : 0.7857142857142857
mefE : 0.8506493506493507
optrA : 0.9935064935064936
poxtA : 0.9935064935064936
SAT-4 : 0.922077922077922
tet(40) : 0.987012987012987
tet(L) : 0.9935064935064936
tetM : 0.8181818181818182
tetO : 0.7402597402597403
tetS : 0.9805194805194806
tetT : 0.974025974025974
vatE : 0.9935064935064936


Unnamed: 0_level_0,ANT(6)-Ia,ErmB,lsaE,tetM,tetO
ID Strain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
V13,0,0,0,0,0
V142,0,0,0,0,0
V151,0,0,0,0,0
V160,0,0,0,0,0
V161,0,0,0,0,0
...,...,...,...,...,...
V800,0,0,0,1,0
V82,0,0,0,0,0
V90,0,0,0,0,0
V91,0,0,0,0,0


fbp54 : 0.0
gbs0630 : 0.9935064935064936
gbs0631 : 0.9935064935064936
gbs0632 : 0.9935064935064936
hasC : 0.0
lmb : 0.9935064935064936
mf2 : 0.961038961038961
mf3 : 0.6753246753246753
scpA : 0.9935064935064936
sda : 0.8766233766233766
ska : 0.9935064935064936
slo : 0.9935064935064936
smeZ : 0.9935064935064936
spec : 0.974025974025974
speg : 0.9090909090909091
spek : 0.961038961038961
spel : 0.974025974025974
spem : 0.948051948051948


Unnamed: 0_level_0,mf3
ID Strain,Unnamed: 1_level_1
V13,0
V142,1
V151,0
V160,0
V161,1
...,...
V800,0
V82,1
V90,0
V91,1


In [4]:
# Define a function for standard scaling
def standard_scaler(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

# Define a function for dimensionality reduction using PCA
def dimensionality_reduction(X_train, X_test, n_components):
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    X_train_pca = pd.DataFrame(X_train_pca)
    X_test_pca = pd.DataFrame(X_test_pca)
    print(X_train_pca.shape)
    return X_train_pca, X_test_pca

def makeScoreMeanWithoutNaN(metrics):
    for name, metrica in metrics.items():
        print(name)
        print(metrics[name])
        metrics[name] = metrics[name][~np.isnan(metrics[name])]
        print(metrics[name])
        metrics[name] = np.mean(metrics[name])
        print(metrics[name])
    print(metrics)
    return metrics

In [5]:
# define the models
models = {'LogisticRegression': LogisticRegression(random_state=RANDOM_STATE),
          'Ridge' : RidgeClassifier(random_state=RANDOM_STATE),
          'DecisionTree': DecisionTreeClassifier(random_state=RANDOM_STATE),
          'K-nn': KNeighborsClassifier(),
          'RandomForest': RandomForestClassifier(random_state=RANDOM_STATE),
          'BernoulliNB': BernoulliNB(),
          'GaussianNB': GaussianNB(),
          #'NearestCentroid': NearestCentroid()
          }

n_classes = [0,1]
# Hyperparameter tuning using RandomizedSearchCV
param_grid = {'LogisticRegression': {'C': np.logspace(-4, 4, 25), 
                                    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                                    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
                                    'fit_intercept': [True, False],
                                    'intercept_scaling': [0.5, 1, 2],
                                    'class_weight': [None, 'balanced']
                                    },
              'Ridge' : {'alpha': np.logspace(-5, 5, 75)},
              'DecisionTree': {'ccp_alpha': [0.0] + list(np.logspace(-3, 1, 25)),
                                'class_weight': [None, 'balanced'],
                                'criterion': ['gini', 'entropy', 'log_loss'],
                                'max_depth': [None] + list(range(1, 20)),
                                'max_features': [None, 'auto', 'sqrt', 'log2'],
                                'min_samples_leaf': range(1, 10),
                                'min_samples_split': range(2, 10),
                                'splitter': ['best', 'random']
                                },
              'K-nn': {'n_neighbors': list(range(1, 20, 1)),
                        'weights': ['uniform', 'distance'],
                        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                        'p': [1,2]
                        },
              'RandomForest': {'ccp_alpha': [0] + list(np.logspace(-3, 1, 25)),
                                'class_weight': [None, 'balanced'],
                                'n_estimators': range(50,500,50),
                                'max_features': [None, 'auto', 'sqrt', 'log2'],
                                'max_depth' : [None,4,6,8,10],
                                'criterion' :['gini', 'entropy']
                                },
              'BernoulliNB': {'alpha': np.logspace(-2, 1, 10),
                            'fit_prior': [True, False],
                            'class_prior': [None, [0.1,]* len(n_classes)],
                            'binarize': [None, -5, -2, 0.0, 2, 5, 10.0]
                            },
              'GaussianNB': {'var_smoothing': np.logspace(0,-9, num=20)
                             },
              'NearestCentroid': {'shrink_threshold': np.logspace(0, 1, 20),
                                'metric': ['euclidean', 'manhattan']
                                },
              'SVC': {'C': np.logspace(-4, 4, 25),
                      'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                      'degree': range(2,5),
                      'gamma': np.logspace(-3, 1, 25)},
              'LabelPropagation': {'n_neighbors': [7, 21, 41, 81, 121, 181, 241],
                                   'gamma': [0.1, 1, 5, 10, 20, 30, 50]},
              'LabelSpreading': {'n_neighbors': [7, 21, 41, 81, 121, 181, 241],
                                'gamma': [0.1, 1, 5, 10, 20, 30, 50],
                                'alpha': [0.15, 0.2, 0.35, 0.55, 0.75, 0.95]},
              'SGDClassifier': {'loss' : ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'],
                            'penalty' : ['l1', 'l2', 'elasticnet'],
                            'alpha' : np.logspace(-4, 4, 25),
                            'learning_rate' : ['constant', 'optimal', 'invscaling', 'adaptive'],
                            'class_weight' : [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}],
                            'eta0' : [1, 10, 100]},
              'LinearSVC': {'penalty': ['l1', 'l2'],
                            'loss': ['hinge', 'squared_hinge'],
                            'class_weight': [None, 'balanced']}
              }

In [6]:
# create an empty dataframe to store the metrics
#Tutte le metriche in cv e con st, la metrica nella tesi è quella
score_target = {}
metrics_df = pd.DataFrame(columns=['Target', 'Model', 'Accuracy CV', 'St. Dev. CV', 
                                   'Precision CV', 'Recall CV','F1-Score CV','Accuracy'])
#metrics_pca_df = pd.DataFrame(columns=['Target', 'Model', 'Accuracy CV', 'STD CV', 'Precision CV','Recall CV','F1-Score CV','Accuracy'])

metrics = {}
scori = ['accuracy', 'recall_weighted', 'precision_weighted','f1_weighted']
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
X = maldi
for str_target, target in targets.items():
    columns = target.columns
    for column in columns:    
       y = target[column]
       n_classes = np.unique(y)
       param_grid['BernoulliNB']['class_prior'] = [None, [0.1,]* len(n_classes)]
       # split the data into training and testing sets
       X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
       X_pca_train, X_pca_test = dimensionality_reduction(X_train, X_test, n_components=0.95)
       #print("Colonna:"+column)
       dataframes = {'' : (X_train, X_test),
                    '_PCA' : (X_pca_train, X_pca_test)}
       # evaluate the models on the original dataset
       #pca = '_PCA'
       
       for name, model in models.items():
              for pca, dataframe in dataframes.items():
                     X_train = dataframe[0]
                     X_test = dataframe[1]
                     model_base = model
                     model_best = model
                     
                     #Modello base
                     #print("Modello:"+name)
                     
                     cv = cross_validate(estimator=model_base, X=X_train, y=y_train,
                                          scoring=scori, cv=skfold, n_jobs=N_JOBS, verbose=0)
       
                     metrics['acc'] = cv.get('test_accuracy').mean()  
                     metrics['st'] = cv.get('test_accuracy').std()
                     metrics['prec'] = cv.get('test_precision_weighted').mean()
                     metrics['rec'] = cv.get('test_recall_weighted').mean()
                     metrics['f1'] = cv.get('test_f1_weighted').mean()
                     
                     model_base.fit(X_train, y_train)
                     y_pred = model_base.predict(X_test)
                     
                     acc = accuracy_score(y_test, y_pred)
                     #prec = precision_score(y_test, y_pred, average='weighted')
                     #rec = recall_score(y_test, y_pred, average='weighted')
                     #f1 = f1_score(y_test, y_pred, average='weighted')          
                     
                     ris = {'Target': column,
                            'Model': name+pca,
                            'Accuracy CV' : metrics['acc'],
                            'St. Dev. CV' : metrics['st'],
                            'Precision CV' : metrics['prec'],
                            'Recall CV' : metrics['rec'],
                            'F1-Score CV' : metrics['f1'],
                            'Accuracy' : acc} 
                     #display(ris)
                     metrics_df = metrics_df.append(ris, ignore_index=True)  
                     '''
                     if name == 'DecisionTree' or name == 'RandomForest':
                            print(name)
                            parametri = model_base.get_params()
                            print('Old Parametri:')
                            print(parametri)
                     '''
                     #Tuning iperparametri
                     params = param_grid[name]
                     rs = RandomizedSearchCV(estimator=model_best, param_distributions=params,
                                          scoring=scori, refit="accuracy", cv=skfold, 
                                          n_jobs=-1, random_state=RANDOM_STATE, verbose=0)
                     rs.fit(X_train, y_train)
                     
                     results = rs.cv_results_
                     model_best = rs.best_estimator_
                     parametri = model_best.get_params()
                     cv_best = rs.best_score_
                     '''
                     if name == 'DecisionTree' or name == 'RandomForest':
                            print('Parametri possibili:')
                            print(params)
                            print('New Parametri:')
                            print(parametri)
                            print(cv_best)
                     '''    
                     cv = cross_validate(estimator=model_best, X=X_train, y=y_train,
                                          scoring=scori, cv=skfold, n_jobs=N_JOBS, verbose=0)
       
                     metrics['acc'] = cv.get('test_accuracy').mean()  
                     metrics['st'] = cv.get('test_accuracy').std()
                     metrics['prec'] = cv.get('test_precision_weighted').mean()
                     metrics['rec'] = cv.get('test_recall_weighted').mean()
                     metrics['f1'] = cv.get('test_f1_weighted').mean()
                     
                     model_best.fit(X_train, y_train)
                     y_pred = model_best.predict(X_test)
                     
                     acc = accuracy_score(y_test, y_pred)
                     #prec = precision_score(y_test, y_pred, average='weighted')
                     #rec = recall_score(y_test, y_pred, average='weighted')
                     #f1 = f1_score(y_test, y_pred, average='weighted')
                     
                     ris = {'Target': column,
                            'Model': name+'_Best'+pca,
                            'Accuracy CV' : metrics['acc'],
                            'St. Dev. CV' : metrics['st'],
                            'Precision CV' : metrics['prec'],
                            'Recall CV' : metrics['rec'],
                            'F1-Score CV' : metrics['f1'],
                            'Accuracy' : acc} 
                     #display(ris)
                     metrics_df = metrics_df.append(ris, ignore_index=True)  
       print('\n')
       score_target[column] = metrics_df
       #metrics_df.to_csv('..\Risultati\Results_Def_'+str(n)+'picchi\Results_'+column+'_'+str(n)+'.csv', index = False)
       
       metrics_df = pd.DataFrame(columns=['Target', 'Model', 'Accuracy CV', 'St. Dev. CV', 
                                   'Precision CV', 'Recall CV','F1-Score CV','Accuracy'])

       display(score_target[column])

(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,Eritromicina,LogisticRegression,0.512,0.009798,0.26224,0.512,0.346807,0.645161
1,Eritromicina,LogisticRegression_Best,0.585333,0.05891,0.591633,0.585333,0.573894,0.677419
2,Eritromicina,LogisticRegression_PCA,0.512,0.009798,0.26224,0.512,0.346807,0.645161
3,Eritromicina,LogisticRegression_Best_PCA,0.601333,0.065755,0.609313,0.601333,0.588431,0.677419
4,Eritromicina,Ridge,0.512,0.009798,0.26224,0.512,0.346807,0.645161
5,Eritromicina,Ridge_Best,0.544,0.085548,0.551427,0.544,0.512923,0.645161
6,Eritromicina,Ridge_PCA,0.512,0.009798,0.26224,0.512,0.346807,0.645161
7,Eritromicina,Ridge_Best_PCA,0.527667,0.078878,0.535263,0.527667,0.493348,0.677419
8,Eritromicina,DecisionTree,0.568333,0.070616,0.567603,0.568333,0.565447,0.677419
9,Eritromicina,DecisionTree_Best,0.552,0.0449,0.552359,0.552,0.549742,0.580645


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,Tetraciclina,LogisticRegression,0.512,0.009798,0.26224,0.512,0.346807,0.354839
1,Tetraciclina,LogisticRegression_Best,0.657,0.093205,0.65819,0.657,0.656712,0.645161
2,Tetraciclina,LogisticRegression_PCA,0.512,0.009798,0.26224,0.512,0.346807,0.354839
3,Tetraciclina,LogisticRegression_Best_PCA,0.641333,0.054195,0.649498,0.641333,0.635592,0.741935
4,Tetraciclina,Ridge,0.512,0.009798,0.26224,0.512,0.346807,0.354839
5,Tetraciclina,Ridge_Best,0.649333,0.066312,0.676567,0.649333,0.630442,0.548387
6,Tetraciclina,Ridge_PCA,0.512,0.009798,0.26224,0.512,0.346807,0.354839
7,Tetraciclina,Ridge_Best_PCA,0.641,0.075994,0.659199,0.641,0.624002,0.548387
8,Tetraciclina,DecisionTree,0.658667,0.065679,0.666379,0.658667,0.653976,0.580645
9,Tetraciclina,DecisionTree_Best,0.61,0.038528,0.62308,0.61,0.602571,0.548387


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,Gentamicina,LogisticRegression,0.642333,0.013482,0.412774,0.642333,0.502528,0.548387
1,Gentamicina,LogisticRegression_Best,0.725,0.096598,0.723419,0.725,0.715936,0.548387
2,Gentamicina,LogisticRegression_PCA,0.642333,0.013482,0.412774,0.642333,0.502528,0.548387
3,Gentamicina,LogisticRegression_Best_PCA,0.700333,0.070572,0.69514,0.700333,0.694135,0.580645
4,Gentamicina,Ridge,0.642333,0.013482,0.412774,0.642333,0.502528,0.548387
5,Gentamicina,Ridge_Best,0.642333,0.013482,0.412774,0.642333,0.502528,0.548387
6,Gentamicina,Ridge_PCA,0.642333,0.013482,0.412774,0.642333,0.502528,0.548387
7,Gentamicina,Ridge_Best_PCA,0.642333,0.013482,0.412774,0.642333,0.502528,0.548387
8,Gentamicina,DecisionTree,0.553667,0.076306,0.548213,0.553667,0.545336,0.677419
9,Gentamicina,DecisionTree_Best,0.666,0.071931,0.659126,0.666,0.660952,0.612903


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,Clindamicina,LogisticRegression,0.748,0.014697,0.55972,0.748,0.640246,0.677419
1,Clindamicina,LogisticRegression_Best,0.861333,0.034098,0.861394,0.861333,0.856164,0.709677
2,Clindamicina,LogisticRegression_PCA,0.748,0.014697,0.55972,0.748,0.640246,0.677419
3,Clindamicina,LogisticRegression_Best_PCA,0.878,0.025417,0.878135,0.878,0.871653,0.709677
4,Clindamicina,Ridge,0.748,0.014697,0.55972,0.748,0.640246,0.677419
5,Clindamicina,Ridge_Best,0.748,0.014697,0.55972,0.748,0.640246,0.677419
6,Clindamicina,Ridge_PCA,0.748,0.014697,0.55972,0.748,0.640246,0.677419
7,Clindamicina,Ridge_Best_PCA,0.748,0.014697,0.55972,0.748,0.640246,0.677419
8,Clindamicina,DecisionTree,0.788333,0.031903,0.806341,0.788333,0.793727,0.709677
9,Clindamicina,DecisionTree_Best,0.822333,0.063154,0.819904,0.822333,0.812433,0.548387


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,Enrofloxacin,LogisticRegression,0.674667,0.006532,0.455218,0.674667,0.543619,0.645161
1,Enrofloxacin,LogisticRegression_Best,0.691,0.019877,0.679472,0.691,0.624831,0.677419
2,Enrofloxacin,LogisticRegression_PCA,0.674667,0.006532,0.455218,0.674667,0.543619,0.645161
3,Enrofloxacin,LogisticRegression_Best_PCA,0.699333,0.039336,0.701265,0.699333,0.676408,0.677419
4,Enrofloxacin,Ridge,0.674667,0.006532,0.455218,0.674667,0.543619,0.645161
5,Enrofloxacin,Ridge_Best,0.674667,0.006532,0.455218,0.674667,0.543619,0.645161
6,Enrofloxacin,Ridge_PCA,0.674667,0.006532,0.455218,0.674667,0.543619,0.645161
7,Enrofloxacin,Ridge_Best_PCA,0.674667,0.006532,0.455218,0.674667,0.543619,0.645161
8,Enrofloxacin,DecisionTree,0.592333,0.074904,0.597304,0.592333,0.584457,0.612903
9,Enrofloxacin,DecisionTree_Best,0.748,0.029257,0.767296,0.748,0.711137,0.645161


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,ANT(6)-Ia,LogisticRegression,0.829333,0.014967,0.688018,0.829333,0.752035,0.806452
1,ANT(6)-Ia,LogisticRegression_Best,0.886333,0.046409,0.91145,0.886333,0.892391,0.774194
2,ANT(6)-Ia,LogisticRegression_PCA,0.829333,0.014967,0.688018,0.829333,0.752035,0.806452
3,ANT(6)-Ia,LogisticRegression_Best_PCA,0.886333,0.046409,0.91145,0.886333,0.892391,0.774194
4,ANT(6)-Ia,Ridge,0.829333,0.014967,0.688018,0.829333,0.752035,0.806452
5,ANT(6)-Ia,Ridge_Best,0.829333,0.014967,0.688018,0.829333,0.752035,0.806452
6,ANT(6)-Ia,Ridge_PCA,0.829333,0.014967,0.688018,0.829333,0.752035,0.806452
7,ANT(6)-Ia,Ridge_Best_PCA,0.829333,0.014967,0.688018,0.829333,0.752035,0.806452
8,ANT(6)-Ia,DecisionTree,0.838333,0.054934,0.857362,0.838333,0.839553,0.677419
9,ANT(6)-Ia,DecisionTree_Best,0.853667,0.049153,0.855793,0.853667,0.853318,0.709677


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,ErmB,LogisticRegression,0.813333,0.01633,0.661778,0.813333,0.729697,0.83871
1,ErmB,LogisticRegression_Best,0.845667,0.029356,0.820657,0.845667,0.818704,0.806452
2,ErmB,LogisticRegression_PCA,0.813333,0.01633,0.661778,0.813333,0.729697,0.83871
3,ErmB,LogisticRegression_Best_PCA,0.838,0.042667,0.857279,0.838,0.844295,0.774194
4,ErmB,Ridge,0.813333,0.01633,0.661778,0.813333,0.729697,0.83871
5,ErmB,Ridge_Best,0.813333,0.01633,0.661778,0.813333,0.729697,0.83871
6,ErmB,Ridge_PCA,0.813333,0.01633,0.661778,0.813333,0.729697,0.83871
7,ErmB,Ridge_Best_PCA,0.813333,0.01633,0.661778,0.813333,0.729697,0.83871
8,ErmB,DecisionTree,0.732,0.05891,0.723304,0.732,0.724467,0.709677
9,ErmB,DecisionTree_Best,0.838,0.042667,0.797447,0.838,0.799072,0.741935


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,lsaE,LogisticRegression,0.796667,0.004082,0.634694,0.796667,0.706512,0.741935
1,lsaE,LogisticRegression_Best,0.894333,0.042158,0.903894,0.894333,0.877488,0.709677
2,lsaE,LogisticRegression_PCA,0.796667,0.004082,0.634694,0.796667,0.706512,0.741935
3,lsaE,LogisticRegression_Best_PCA,0.902333,0.055534,0.910748,0.902333,0.901863,0.741935
4,lsaE,Ridge,0.796667,0.004082,0.634694,0.796667,0.706512,0.741935
5,lsaE,Ridge_Best,0.796667,0.004082,0.634694,0.796667,0.706512,0.741935
6,lsaE,Ridge_PCA,0.796667,0.004082,0.634694,0.796667,0.706512,0.741935
7,lsaE,Ridge_Best_PCA,0.796667,0.004082,0.634694,0.796667,0.706512,0.741935
8,lsaE,DecisionTree,0.852667,0.073971,0.866379,0.852667,0.853953,0.612903
9,lsaE,DecisionTree_Best,0.902333,0.055534,0.903131,0.902333,0.90163,0.741935


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,tetM,LogisticRegression,0.845667,0.014892,0.715374,0.845667,0.775022,0.709677
1,tetM,LogisticRegression_Best,0.821667,0.053914,0.827968,0.821667,0.8226,0.548387
2,tetM,LogisticRegression_PCA,0.845667,0.014892,0.715374,0.845667,0.775022,0.709677
3,tetM,LogisticRegression_Best_PCA,0.862,0.06621,0.850954,0.862,0.850724,0.645161
4,tetM,Ridge,0.845667,0.014892,0.715374,0.845667,0.775022,0.709677
5,tetM,Ridge_Best,0.845667,0.014892,0.715374,0.845667,0.775022,0.709677
6,tetM,Ridge_PCA,0.845667,0.014892,0.715374,0.845667,0.775022,0.709677
7,tetM,Ridge_Best_PCA,0.845667,0.014892,0.715374,0.845667,0.775022,0.709677
8,tetM,DecisionTree,0.804,0.044091,0.782786,0.804,0.789785,0.677419
9,tetM,DecisionTree_Best,0.854,0.059569,0.844901,0.854,0.843144,0.741935


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,tetO,LogisticRegression,0.732,0.014697,0.53604,0.732,0.618817,0.774194
1,tetO,LogisticRegression_Best,0.804667,0.040075,0.835831,0.804667,0.765402,0.741935
2,tetO,LogisticRegression_PCA,0.732,0.014697,0.53604,0.732,0.618817,0.774194
3,tetO,LogisticRegression_Best_PCA,0.764667,0.057091,0.757881,0.764667,0.752804,0.741935
4,tetO,Ridge,0.732,0.014697,0.53604,0.732,0.618817,0.774194
5,tetO,Ridge_Best,0.732,0.014697,0.53604,0.732,0.618817,0.774194
6,tetO,Ridge_PCA,0.732,0.014697,0.53604,0.732,0.618817,0.774194
7,tetO,Ridge_Best_PCA,0.732,0.014697,0.53604,0.732,0.618817,0.774194
8,tetO,DecisionTree,0.772333,0.0543,0.773264,0.772333,0.770198,0.645161
9,tetO,DecisionTree_Best,0.796,0.054392,0.818613,0.796,0.776561,0.741935


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,mf3,LogisticRegression,0.666667,0.014606,0.444658,0.666667,0.533426,0.709677
1,mf3,LogisticRegression_Best,0.764667,0.088282,0.76503,0.764667,0.752295,0.83871
2,mf3,LogisticRegression_PCA,0.666667,0.014606,0.444658,0.666667,0.533426,0.709677
3,mf3,LogisticRegression_Best_PCA,0.756667,0.118523,0.743488,0.756667,0.745725,0.83871
4,mf3,Ridge,0.666667,0.014606,0.444658,0.666667,0.533426,0.709677
5,mf3,Ridge_Best,0.666667,0.014606,0.444658,0.666667,0.533426,0.709677
6,mf3,Ridge_PCA,0.666667,0.014606,0.444658,0.666667,0.533426,0.709677
7,mf3,Ridge_Best_PCA,0.666667,0.014606,0.444658,0.666667,0.533426,0.709677
8,mf3,DecisionTree,0.682333,0.083876,0.697574,0.682333,0.683707,0.709677
9,mf3,DecisionTree_Best,0.697667,0.122373,0.728784,0.697667,0.68487,0.580645


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,subspecies,LogisticRegression,0.539333,0.180815,0.333741,0.539333,0.402574,0.322581
1,subspecies,LogisticRegression_Best,0.967333,0.01635,0.971194,0.967333,0.967367,1.0
2,subspecies,LogisticRegression_PCA,0.539333,0.180815,0.333741,0.539333,0.402574,0.322581
3,subspecies,LogisticRegression_Best_PCA,0.951333,0.015684,0.958394,0.951333,0.951113,1.0
4,subspecies,Ridge,0.539333,0.180815,0.333741,0.539333,0.402574,0.322581
5,subspecies,Ridge_Best,0.822,0.068162,0.864683,0.822,0.79426,0.903226
6,subspecies,Ridge_PCA,0.539333,0.180815,0.333741,0.539333,0.402574,0.322581
7,subspecies,Ridge_Best_PCA,0.822,0.068162,0.864683,0.822,0.79426,0.903226
8,subspecies,DecisionTree,0.934333,0.042447,0.945598,0.934333,0.934198,0.967742
9,subspecies,DecisionTree_Best,0.918,0.052211,0.929159,0.918,0.918666,0.967742


(123, 12)




Unnamed: 0,Target,Model,Accuracy CV,St. Dev. CV,Precision CV,Recall CV,F1-Score CV,Accuracy
0,ST,LogisticRegression,0.13,0.015166,0.01713,0.13,0.030224,0.129032
1,ST,LogisticRegression_Best,0.283667,0.064674,0.278111,0.283667,0.273128,0.516129
2,ST,LogisticRegression_PCA,0.13,0.015166,0.01713,0.13,0.030224,0.129032
3,ST,LogisticRegression_Best_PCA,0.323333,0.106916,0.281306,0.323333,0.284383,0.387097
4,ST,Ridge,0.13,0.015166,0.01713,0.13,0.030224,0.129032
5,ST,Ridge_Best,0.146333,0.019675,0.036926,0.146333,0.04626,0.129032
6,ST,Ridge_PCA,0.13,0.015166,0.01713,0.13,0.030224,0.129032
7,ST,Ridge_Best_PCA,0.146333,0.019675,0.036926,0.146333,0.04626,0.129032
8,ST,DecisionTree,0.17,0.080774,0.171917,0.17,0.162898,0.258065
9,ST,DecisionTree_Best,0.22,0.043982,0.100683,0.22,0.130903,0.16129
