**import package**

In [1]:
# python version. 3.8.10
from sklearn.model_selection import train_test_split
from sklearn.linear_model import MultiTaskElasticNetCV
from sklearn import preprocessing
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

**Import & Preprocess data**

* X,y training data is extracted from the journal
* The simulated flux data is imported and preprocessed for training data (X_train).
* Y data represents 3 antibiotics, AMP, CIP,GENT

In [2]:
X_data = pd.read_csv('X_train.csv',  encoding='cp949',index_col='iJO1366 Reaction')  
y_data = pd.read_csv('y_train.csv', encoding='cp949', index_col='iJO1366 Reaction')

y_train = y_data.values
y_train_AMP = y_data.values[:,0] 
y_train_CIP = y_data.values[:,1]  
y_train_GENT = y_data.values[:2]

X_train_scaled = preprocessing.StandardScaler().fit_transform(X_data)

**Machine learning with ElasticNet regression**

Hyperparameter setting

In [None]:
#Hyperparameter setting
# Full explanation at https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNetCV.html
random_seed = 0
alpha = 100 # How many random alphas to test
tolerance = 1e-6 # Max iterations for testing each alpha
cross_val = 50 #how many cross validation folds
cpu_amount = 4 #How many cpu threads to employ for ML training, more cpu allocation correlates with reduced amount of time for training
l1_ratio_list = [0.015] #give a list of l1ratios and the ratio with the lowest MSE will be selected. I recommend l1ratio of 0.01, although the journal does not mention the exact ratio.


Training the model

In [4]:

MTEN = MultiTaskElasticNetCV( n_alphas=alpha, tol = tolerance, cv=cross_val, n_jobs=cpu_amount, l1_ratio=l1_ratio_list, random_state=random_seed)

#Train the model
trained_MTEN = MTEN.fit(X_train_scaled,y_train)

Extract & filter the coefficients of the trained model.
Save to CSV file

In [7]:
processed_data = pd.DataFrame(trained_MTEN.coef_, columns=X_data.columns , index=['AMP IC50', 'CIP IC50', 'GENT IC50'])

processed_data = processed_data.transpose()
processed_data.to_csv('whitebox_coef.csv')