# ML (sklearn) with UGA Covid dataset

In [1]:
# Classification with various learners and various dataset

from Library.Import import *
from Library.Utilities import svm_classifier, MLP, XGB, GP 
from Library.Utilities import bayes_classifier, decision_tree_classifier
from Library.Utilities import LeaveXout, read_XY
from sklearn.metrics import r2_score, accuracy_score, f1_score, matthews_corrcoef

DIRECTORY = './Dataset_input/Covid/UGA/'
dataset = 'ALL' #  ALL or ALL_selected_features or MEDIUM
filename = f'{DIRECTORY}{dataset}'

seed = 1
np.random.seed(seed=seed)
niter = 5
selection = 3 # if > 0 remove feature
verbose = False

scoring_function = accuracy_score
feature, X, y = read_XY(filename, scaling='XY')
xfold = 20

for learner in [svm_classifier, XGB, MLP]:

    acc_avr, acc_dev, F = LeaveXout(X, y.ravel(), feature[:-1], 
                                      learner=learner, scoring_function=scoring_function, 
                                      xfold=xfold, niter=niter, 
                                      selection=selection, verbose=verbose)

    print(f'{dataset} Size: {len(F)} Method: {learner.__name__} Score: {scoring_function.__name__} '
          f'Acc: {acc_avr:.3f}±{acc_dev:.3f} '
          f'for {xfold}-fold-CV and {niter} iter')

    if selection:
       F = np.array2string(F).replace('[','').replace(']','')
       print(f'Selected features:\n{F}')


ALL Size: 13 Method: svm_classifier Score: accuracy_score Acc: 0.823±0.015 for 20-fold-CV and 5 iter
Selected features:
'HILIC-M195' 'C18-M89' 'C18-M318' 'C18-M326' 'C18-M74' 'C18-M417'
 'C18-M142' 'HILIC-M98' 'C18-M305' 'HILIC-M113' 'C18-M347' 'C18-M388'
 'HILIC-M91'
ALL Size: 8 Method: XGB Score: accuracy_score Acc: 0.860±0.020 for 20-fold-CV and 5 iter
Selected features:
'C18-M387' 'HILIC-M195' 'C18-M326' 'C18-M74' 'C18-M142' 'C18-M62'
 'HILIC-M185' 'HILIC-M91'
ALL Size: 11 Method: MLP Score: accuracy_score Acc: 0.844±0.023 for 20-fold-CV and 5 iter
Selected features:
'C18-M89' 'C18-M318' 'C18-M326' 'C18-M417' 'C18-M142' 'C18-M377'
 'C18-M286' 'C18-M382' 'C18-M47' 'C18-M388' 'HILIC-M91'


ALL Size: 13 Method: svm_classifier Score: accuracy_score Acc: 0.823±0.015 for 20-fold-CV and 5 iter
Selected features:
'HILIC-M195' 'C18-M89' 'C18-M318' 'C18-M326' 'C18-M74' 'C18-M417'
 'C18-M142' 'HILIC-M98' 'C18-M305' 'HILIC-M113' 'C18-M347' 'C18-M388'
 'HILIC-M91'
ALL Size: 8 Method: XGB Score: accuracy_score Acc: 0.860±0.020 for 20-fold-CV and 5 iter
Selected features:
'C18-M387' 'HILIC-M195' 'C18-M326' 'C18-M74' 'C18-M142' 'C18-M62'
 'HILIC-M185' 'HILIC-M91
 ALL Size: 11 Method: MLP Score: accuracy_score Acc: 0.844±0.023 for 20-fold-CV and 5 iter
Selected features:
'C18-M89' 'C18-M318' 'C18-M326' 'C18-M417' 'C18-M142' 'C18-M377'
 'C18-M286' 'C18-M382' 'C18-M47' 'C18-M388' 'HILIC-M91'
