# Reservoir Computing for Covid dataset

## Conventional reservoir: predict patient condition using provided patient MS data

In [None]:
# Running a Cobra reservoir in predictve mode with a test set 
# Cobra is directly run with KOs instead of an AMN (no training set required)
# Results with no feature selection: 
# KO trpA Method: bayes_classifier Acc: 0.819±0.025 for 10-fold-CV and 3 iter
# results with selection:
# KO trpA Phenotype Method: bayes_classifier Acc: 0.955±0.015 for 10-fold-CV and 3 iter

import cobra
from Library.Import import *
from Library.Utilities import bayes_classifier, LeaveXout, read_XY
from Library.Utilities import best_accuracy_threshold
from Library.Build_Dataset import TrainingSet, create_medium_run_cobra
from sklearn.metrics import r2_score, accuracy_score, f1_score, matthews_corrcoef

DIRECTORY = './Dataset_input/Covid/'
seed = 1
np.random.seed(seed=seed)

# Create Cobra model
name = 'Covid'
cobrafile = f'{DIRECTORY}{name}_duplicated'
mediumfile  = f'{DIRECTORY}{name}'
parameter = TrainingSet(cobraname=cobrafile, 
                        mediumname=mediumfile, 
                        method='FBA')
trainingfile = f'{DIRECTORY}{name}_train'
parameter.save(trainingfile)


# Load MS data
testfile = f'{DIRECTORY}UGA/MEDIUM'
feature, X_test, y_test_true = read_XY(testfile, nY=1, scaling='X')

# Classifier set up
xfold = 10
niter = 3
learner = bayes_classifier # To classify based on all phenotypes
selection = 0 # To reduce the number of features when performing Leave X out, if 0 no selectin
verbose = False
scoring_function = accuracy_score

KOs = ['WT  '] + [gene.name for gene in parameter.model.genes]
i = 0            
for ko in KOs:
    i += 1
    parameter.load(trainingfile)
    # Run cobra to get growth rate for X_test
    y_pred, X = create_medium_run_cobra(parameter.model, 
                                        parameter.objective, 
                                        parameter.medium, 
                                        X_test, method='FBA', scaler=1, 
                                        genekos=[ko], verbose=verbose) 
    
    feature = [r.id for r in parameter.model.reactions]
    X[X < 1e-3] = 0
    zero_columns = np.argwhere(np.all(X == 0, axis=0)).flatten()
    X = np.delete(X, zero_columns, axis=1)
    feature = np.delete(feature, zero_columns, axis=0)
    acc_avr, acc_dev = 0.5, 0
    if X.shape[1]: 
        # Accuracy based on phenotype (X vector)
        acc_avr, acc_dev, feature = LeaveXout(X, y_test_true.ravel(), feature, 
                                     learner=learner, scoring_function=scoring_function,  
                                     xfold=xfold, niter=niter, 
                                     selection=selection, verbose=verbose)
        
    # Printing
    print(f'KO {ko} Phenotype Size: {X.shape[1]:4d} Method: {learner.__name__} Score: {scoring_function.__name__} '
            f'Acc: {acc_avr:.3f}±{acc_dev:.3f} '
            f'for {xfold}-fold-CV and {niter} iter')
    if selection:
       F = np.array2string(feature).replace('[','').replace(']','')
       print(f'Selected features: {F}')
        

## Physical reservoir: predict patient condition using KO strains growth curves

In [1]:
# Predict Patient condition using growth curves

import glob
from Library.Import    import *
from Library.Utilities import svm_classifier, MLP, XGB, GP 
from Library.Utilities import bayes_classifier, decision_tree_classifier
from Library.Utilities import LeaveXout, read_XY
from sklearn.metrics   import mean_squared_error, accuracy_score

DIRECTORY = './'
xfold = 5 # 5 = LOO 
niter = 3
learner = XGB
scoring_function=accuracy_score
selection = 3

files = glob.glob(f'{DIRECTORY}Dataset_input/Covid/His_mild_severe.csv')          
for filename in files:
    filename = os.path.splitext(filename)[0]
    feature, X, y = read_XY(filename)
    y = y[:,0]    
    # Train X, y for classification
    start_time = time.time()
    acc_avr, acc_dev, feature = LeaveXout(X, y.ravel(), feature, 
                                    learner=learner, 
                                    scoring_function=scoring_function, 
                                    xfold=xfold, niter=niter, 
                                    selection=selection, 
                                    verbose=True)
    delta_time = time.time() - start_time
    F = np.array2string(feature).replace('[','').replace(']','')
    print(f'{filename} Size: {X.shape} Method: {learner.__name__} '
      f'Acc: {acc_avr:.2f}±{acc_dev:.2f} '
      f'(for {xfold}-fold-CV and {niter} iter) '
      f'number of features: {len(feature)} cpu time {delta_time:.2f}\n')
    if selection > 0:
          print(f'Selected features: {F}')

Size: 161 Remove: 2.988888889 Score: 0.631±0.025
Size: 160 Remove: 2.655555556 Score: 0.640±0.013
Size: 159 Remove: 3.155555556 Score: 0.649±0.000
Size: 158 Remove: 3.322222222 Score: 0.658±0.013
Size: 157 Remove: 4.322222222 Score: 0.685±0.013
Size: 156 Remove: 0.155555556 Score: 0.685±0.013
Size: 155 Remove: 0.322222222 Score: 0.685±0.013
Size: 154 Remove: 0.488888889 Score: 0.685±0.013
Size: 153 Remove: 0.655555556 Score: 0.685±0.013
Size: 152 Remove: 0.822222222 Score: 0.685±0.013
Size: 151 Remove: 0.988888889 Score: 0.685±0.013
Size: 150 Remove: 1.155555556 Score: 0.685±0.013
Size: 149 Remove: 1.322222222 Score: 0.685±0.013
Size: 148 Remove: 1.488888889 Score: 0.685±0.013
Size: 147 Remove: 1.655555556 Score: 0.685±0.013
Size: 146 Remove: 1.822222222 Score: 0.685±0.013
Size: 145 Remove: 1.988888889 Score: 0.685±0.013
Size: 144 Remove: 2.155555556 Score: 0.685±0.013
Size: 143 Remove: 2.322222222 Score: 0.685±0.013
Size: 142 Remove: 2.488888889 Score: 0.685±0.013
Size: 141 Remove: 2.

Select feature starting with all

- MG1655
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: decision_tree_classifier Acc: 0.786±0.039 (for 5-fold-CV and 3 iter) number of features: 5 cpu time 9.46
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: XGB Acc: 0.820±0.025 (for 5-fold-CV and 3 iter) number of features: 5 cpu time 437.49
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: XGB Acc: 0.821±0.029 (for 5-fold-CV and 5 iter) number of features: 5 cpu time 724.39 Selected features: '0.655555556' '1.155555556' '2.655555556' '3.488888889' 
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: XGB Acc: 0.820±0.025 (for 5-fold-CV and 3 iter) number of features: 5 cpu time 490.00
Selected features: '0.655555556' '1.155555556' '2.655555556' '3.488888889'
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: XGB Acc: 0.820±0.025 (for 5-fold-CV and 3 iter) number of features: 5 cpu time 495.13
Selected features: '0.655555556' '1.155555556' '2.655555556' '3.488888889' 'y_true'
./Dataset_input/Covid/MG1655_mild_severe_smooth Size: (73, 48) Method: XGB Acc: 0.737±0.042 (for 5-fold-CV and 3 iter) number of features: 10 cpu time (73, 48)  Selected features: '0.155555556' '0.655555556' '1.822222222' '2.488888889' '3.488888889' '3.822222222' '4.488888889' '5.322222222' '5.655555556' 

- His
./Dataset_input/Covid/His_mild_severe Size: (37, 162) Method: XGB Acc: 0.754±0.051 (for 5-fold-CV and 3 iter) number of features: 7 cpu time 5601.59 Selected features: '4.822222222' '16.98888889' '19.98888889' '20.15555556' '21.48888889' '25.82222222'
./Dataset_input/Covid/His_mild_severe Size: (37, 162) Method: XGB Acc: 0.75±0.05 (for 5-fold-CV and 3 iter) number of features: 7 cpu time 5887.87 Selected features: '4.822222222' '16.98888889' '19.98888889' '20.15555556' '21.48888889' '25.82222222' 
./Dataset_input/Covid/His_mild_severe Size: (37, 162) Method: GP Acc: 0.528±0.073 (for 5-fold-CV and 3 iter) number of features: 24 cpu time 858.68
./Dataset_input/Covid/His_mild_severe Size: (37, 162) Method: decision_tree_classifier Acc: 0.668±0.083 (for 5-fold-CV and 3 iter) number of features: 6 cpu time 125.92 Selected features: '4.822222222' '20.15555556' '24.65555556' '24.98888889' '26.65555556'

OLD RUNS

Select features using random forest first
./Dataset_input/Covid/His_mild_severe Size: (37, 162) Method: XGB Acc: 0.690±0.047 (for 5-fold-CV and 5 iter) number of features: 13 cpu time 327.08
./Dataset_input/Covid/His_mild_severe Size: (37, 162) Method: GP Acc: 0.562±0.067 (for 5-fold-CV and 3 iter) number of features: 14 cpu time 30.23
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: GP Acc: 0.544±0.047 (for 5-fold-CV and 3 iter) number of features: 12 cpu time 73.64
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: XGB Acc: 0.734±0.037 (for 5-fold-CV and 3 iter) number of features: 8 cpu time 168.91
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: MLP Acc: 0.480±0.056 (for 5-fold-CV and 3 iter) number of features: 3 cpu time 41.85
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: bayes_classifier Acc: 0.698±0.035 (for 5-fold-CV and 3 iter) number of features: 7 cpu time 2.41
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: svm_classifier Acc: 0.411±0.040 (for 5-fold-CV and 3 iter) number of features: 29 cpu time 2.88
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: decision_tree_classifier Acc: 0.788±0.040 (for 5-fold-CV and 3 iter) number of features: 4 cpu time 3.36
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: decision_tree_classifier Acc: 0.769±0.038 (for 5-fold-CV and 3 iter) number of features: 5 cpu time 3.35
./Dataset_input/Covid/MG1655_mild_severe Size: (73, 48) Method: decision_tree_classifier Acc: 0.786±0.039 (for 5-fold-CV and 3 iter) number of features: 5 cpu time 9.46
./Dataset_input/Covid/MG1655_mild_severe Size: (176, 48) Method: XGB Acc: 0.626±0.028 (for 5-fold-CV and 3 iter) number of features: 49 cpu time 6.07
./Dataset_input/Covid/MG1655_mild_severe Size: (176, 48) Method: MLP Acc: 0.519±0.030 (for 5-fold-CV and 3 iter) number of features: 49 cpu time 0.83
./Dataset_input/Covid/MG1655_mild_severe Size: (176, 48) Method: svm_classifier Acc: 0.543±0.013 (for 5-fold-CV and 3 iter) number of features: 49 cpu time 0.13


GC_smooth_mean_dev_mild_severe_20 Size: (240, 114) Method: MLP Acc: 0.900±0.014 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 113.57
GC_smooth_mean_dev_mild_severe_20 Size: (240, 114) Method: MLP Acc: 0.900±0.012 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 58.89
GC_smooth_mean_dev_mild_severe_20 Size: (240, 114) Method: XGB Acc: 0.984±0.005 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 4.46
GC_smooth_mean_dev_mild_severe_20 Size: (240, 114) Method: GP Acc: 1.000±0.000 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 8270.01
GC_smooth_mean_dev_mild_severe_20 Size: (240, 114) Method: GP Acc: 1.000±0.000 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 6.23

Mild_vs_Severe_20 Size: (240, 114) Method: XGB Acc: 0.970±0.007 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 5.12
Mild_vs_Severe_tyr_20 Size: (40, 114) Method: XGB Acc: 0.958±0.024 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 1.88
Mild_vs_Severe_phe_20 Size: (40, 114) Method: XGB Acc: 0.964±0.017 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 2.08
Mild_vs_Severe_leu_20 Size: (40, 114) Method: XGB Acc: 0.968±0.013 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 2.00
Mild_vs_Severe_gly_20 Size: (40, 114) Method: XGB Acc: 0.970±0.014 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 1.87
Mild_vs_Severe_his_20 Size: (40, 114) Method: XGB Acc: 0.904±0.042 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 1.92
Mild_vs_Severe_met_20 Size: (40, 114) Method: XGB Acc: 0.941±0.022 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 2.29

Mild_vs_Severe_met_20 Size: (40, 114) Method: XGB Acc: 0.941±0.022 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 2.24
Mild_vs_Severe_met0.1_20 Size: (40, 114) Method: XGB Acc: 0.968±0.013 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 1.93
Mild_vs_Severe_met10_20 Size: (40, 114) Method: XGB Acc: 0.820±0.061 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 2.37
Mild_vs_Severe_met50_20 Size: (40, 114) Method: XGB Acc: 0.788±0.062 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 1.96

Mild_vs_Severe_met0.1_20 Size: (40, 114) Method: MLP Acc: 1.000±0.000 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 14.59
Mild_vs_Severe_met_20 Size: (40, 114) Method: MLP Acc: 1.000±0.000 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 14.37
Mild_vs_Severe_met2_20 Size: (40, 114) Method: MLP Acc: 0.633±0.033 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 8.67
Mild_vs_Severe_met10_20 Size: (40, 114) Method: MLP Acc: 0.585±0.050 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 9.11
Mild_vs_Severe_met50_20 Size: (40, 114) Method: MLP Acc: 0.460±0.057 (for 5-fold-CV and 3 iter) number of features: 115 cpu time 4.86

In [2]:
# Predict Patient condition using growth curves with selected features

import glob
from Library.Import    import *
from Library.Utilities import svm_classifier, MLP, XGB, GP 
from Library.Utilities import bayes_classifier, decision_tree_classifier
from Library.Utilities import LeaveXout, read_XY
from sklearn.metrics   import mean_squared_error, accuracy_score

DIRECTORY = './'
xfold = 5 # 5 = LOO
niter = 5
learner = XGB
scoring_function=accuracy_score
selection = -1

files = glob.glob(f'{DIRECTORY}Dataset_input/Covid/MG1655_neg_pos_12.csv')


for filename in files:
    filename = os.path.splitext(filename)[0]
    feature, X, y = read_XY(filename)
    y = y[:,0]    
    for learner in [svm_classifier, MLP, XGB, GP , bayes_classifier, decision_tree_classifier]:
    # Train X, y for classification
        start_time = time.time()
        acc_avr, acc_dev, feature = LeaveXout(X, y.ravel(), feature, 
                                    learner=learner, 
                                    scoring_function=scoring_function, 
                                    xfold=xfold, niter=niter, 
                                    selection=selection, 
                                    verbose=True)
        acc_avr = 1 - acc_avr if acc_avr < 0.5 else acc_avr
        delta_time = time.time() - start_time
        F = np.array2string(feature).replace('[','').replace(']','')
        print(f'{filename} Size: {X.shape} Method: {learner.__name__} '
          f'Acc: {acc_avr:.3f}±{acc_dev:.3f} '
          f'(for {xfold}-fold-CV and {niter} iter) '
          f'number of features: {len(feature)} cpu time {delta_time:.2f}')
        if selection > 0:
              print(f'Selected features: {F}')


./Dataset_input/Covid/MG1655_neg_pos_12 Size: (100, 12) Method: XGB Acc: 0.90±0.03 (for 5-fold-CV and 5 iter) number of features: 13 cpu time 4.80



./Dataset_input/Covid/MG1655_neg_pos_12 Size: (100, 12) Method: XGB Acc: 0.899±0.025 (for 5-fold-CV and 5 iter) number of features: 13 cpu time 5.67
./Dataset_input/Covid/MG1655_neg_pos_12 Size: (100, 12) Method: XGB Acc: 0.908±0.018 (for 10-fold-CV and 5 iter) number of features: 13 cpu time 70.16
./Dataset_input/Covid/MG1655_neg_pos_12 Size: (100, 12) Method: XGB Acc: 0.900±0.000 (for 100-fold-CV and 5 iter) number of features: 13 cpu time 743.71 = LOO

./Dataset_input/Covid/MG1655_mild_severe_4 Size: (73, 4) Method: XGB Acc: 0.821±0.029 (for 5-fold-CV and 5 iter) number of features: 5 cpu time 46.91
./Dataset_input/Covid/MG1655_mild_severe_4 Size: (73, 4) Method: XGB Acc: 0.845±0.020 (for 10-fold-CV and 5 iter) number of features: 5 cpu time 97.30
./Dataset_input/Covid/MG1655_mild_severe_4 Size: (73, 4) Method: XGB Acc: 0.863±0.000 (for 73-fold-CV and 5 iter) number of features: 5 cpu time 731.14 = LOO

./Dataset_input/Covid/His_mild_severe_6 Size: (37, 6) Method: XGB Acc: 0.748±0.048 (for 5-fold-CV and 5 iter) number of features: 7 cpu time 32.05
./Dataset_input/Covid/His_mild_severe_6 Size: (37, 6) Method: XGB Acc: 0.766±0.031 (for 10-fold-CV and 5 iter) number of features: 7 cpu time 66.17
./Dataset_input/Covid/His_mild_severe_6 Size: (37, 6) Method: XGB Acc: 0.784±0.000 (for 37-fold-CV and 5 iter) number of features: 7 cpu time 254.33 = LOO