# Reservoir Computing for Covid dataset

## Conventional reservoir: predict patient condition using provided patient MS data

In [None]:
# Running a Cobra reservoir in predictve mode with a test set 
# Cobra is directly run with KOs instead of an AMN (no training set required)
# Results with no feature selection: 
# KO trpA Method: bayes_classifier Acc: 0.819±0.025 for 10-fold-CV and 3 iter
# results with selection:
# KO trpA Phenotype Method: bayes_classifier Acc: 0.955±0.015 for 10-fold-CV and 3 iter

import cobra
from Library.Import import *
from Library.Utilities import bayes_classifier, LeaveXout, read_XY
from Library.Utilities import best_accuracy_threshold
from Library.Build_Dataset import TrainingSet, create_medium_run_cobra
from sklearn.metrics import r2_score, accuracy_score, f1_score, matthews_corrcoef

DIRECTORY = './Dataset_input/Covid/'
seed = 1
np.random.seed(seed=seed)

# Create Cobra model
name = 'Covid'
cobrafile = f'{DIRECTORY}{name}_duplicated'
mediumfile  = f'{DIRECTORY}{name}'
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, 
                        method='FBA')
trainingfile = f'{DIRECTORY}{name}_train'
parameter.save(trainingfile)


# Load MS data
testfile = f'{DIRECTORY}UGA/MEDIUM'
feature, X_test, y_test_true = read_XY(testfile, nY=1, scaling='X')

# Classifier set up
xfold = 10
niter = 3
learner = bayes_classifier # To classify based on all phenotypes
selection = 0 # To reduce the number of features when performing Leave X out, if 0 no selectin
verbose = False
scoring_function = accuracy_score

KOs = ['WT  '] + [gene.name for gene in parameter.model.genes]
i = 0            
for ko in KOs:
    i += 1
    parameter.load(trainingfile)
    # Run cobra to get growth rate for X_test
    y_pred, X = create_medium_run_cobra(parameter.model, 
                                        parameter.objective, 
                                        parameter.medium, 
                                        X_test, method='FBA', scaler=1, 
                                        genekos=[ko], verbose=verbose) 
    
    feature = [r.id for r in parameter.model.reactions]
    X[X < 1e-3] = 0
    zero_columns = np.argwhere(np.all(X == 0, axis=0)).flatten()
    X = np.delete(X, zero_columns, axis=1)
    feature = np.delete(feature, zero_columns, axis=0)
    acc_avr, acc_dev = 0.5, 0
    if X.shape[1]: 
        # Accuracy based on phenotype (X vector)
        acc_avr, acc_dev, feature = LeaveXout(X, y_test_true.ravel(), feature, 
                                     learner=learner, scoring_function=scoring_function,  
                                     xfold=xfold, niter=niter, 
                                     selection=selection, verbose=verbose)
        
    # Printing
    print(f'KO {ko} Phenotype Size: {X.shape[1]:4d} Method: {learner.__name__} Score: {scoring_function.__name__} '
            f'Acc: {acc_avr:.3f}±{acc_dev:.3f} '
            f'for {xfold}-fold-CV and {niter} iter')
    if selection:
       F = np.array2string(feature).replace('[','').replace(']','')
       print(f'Selected features: {F}')
        

## Physical reservoir: predict patient condition using KO strains growth curves

In [None]:
# Predict Patient condition using all KO GR and ODMax

from Library.Import import *
from Library.Utilities import svm_classifier, MLP, XGB, GP 
from Library.Utilities import bayes_classifier, decision_tree_classifier
from Library.Utilities import LeaveXout, read_XY
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score

DIRECTORY = './'
problempath = f'{DIRECTORY}Dataset_input/Covid/'
problem = 'Mild_vs_Severe_Rand_All50_formated' # Mild_vs_Severe_Rand_All50_formated Covid_Patient_Condition_all_features
xfold = 5 # 5 = LOO
niter = 3
learner = MLP
scoring_function=accuracy_score
selection=-1
filename = f'{problempath}{problem}'
feature, X, y = read_XY(filename)
y = y[:,0]
# Train X, y for classification
r2_avr, r2_dev, feature = LeaveXout(X, y.ravel(), feature, 
                                    learner=learner, 
                                    scoring_function=scoring_function, 
                                    xfold=xfold, niter=niter, 
                                    selection=selection, 
                                    verbose=True)
F = np.array2string(feature[:-1]).replace('[','').replace(']','')
print(f'{problem} Size: {X.shape} Method: {learner.__name__} '
      f'Acc: {r2_avr:.3f}±{r2_dev:.3f} '
      f'for {xfold}-fold-CV and {niter} iter\n'
      f'Selected features {len(feature)}: {F}')

Mild_vs_Severe_Rand_All50_formated Size: (600, 114) Method: XGB Acc: 0.998±0.002 for 5-fold-CV and 3 iter
Mild_vs_Severe_Rand_All50_formated Size: (600, 114) Method: MLP Acc: 0.981±0.005 for 5-fold-CV and 3 iter
Mild_vs_Severe_Rand_All50_formated Size: (600, 114) Method: decision_tree_classifier Acc: 0.957±0.007 for 5-fold-CV and 3 iter
Mild_vs_Severe_Rand_All50_formated Size: (600, 114) Method: bayes_classifier Acc: 0.668±0.010 for 5-fold-CV and 3 iter
Mild_vs_Severe_Rand_All50_formated Size: (600, 114) Method: svm_classifier Acc: 0.537±0.014 for 5-fold-CV and 3 iter