In [None]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch.nn as nn
from src.nn_model.amn_qp import *
from src.utils.import_data import *
from src.utils.import_GEM import *
from src.utils.training import *
from src.utils.plots import *
import matplotlib.pyplot as plt


In [None]:
torch.manual_seed(10)

# step 1: build/load and split model
model = import_GEM(filename='GEMs/iAF1260_split_FBA_reduction.xml', split=True)

# step 2: initialize object GEM which will have all the matrices as attributes
model = GEM(model=model)

# step 3: manually define the matrices for bounded fluxes and reference fluxes
input_ub = ['R_EX_glc__D_e_rev']

#reference = model.reactions # whole flux distribution
reference = ["R_BIOMASS_Ec_iAF1260_core_59p81M"] # growth rate

# step 4 : extract matrices Pin, Pred, S
S, Pin, Pref = model.build_GEM_matrices(input_ub, reference)

data = pd.read_csv('FBA_simulated_data_reservoir_filtered.csv').drop(columns=["ko_gene"])
#input training
X = data[["R_EX_glc__D_e_rev_ub"]]

#target training
y = data[reference]
print(y.shape)

#train and test split
x_train, x_test, y_train, y_test = train_test_split(X.values.astype(np.float32), y.values.astype(np.float32), test_size=0.25, random_state=1)
#print(y_test)'


In [None]:
from hydra import initialize, compose
from omegaconf import OmegaConf

# Initialize Hydra
initialize(config_path="conf", job_name="notebook")

# Compose and load the configuration
cfg = compose(config_name="config")


In [2]:
len(['R_GLCptspp', 'R_PGI_fwd', 'R_PFK', 'R_FBA_fwd', 'R_TPI_fwd', 'R_PGK_rev', 'R_GAPD_fwd', 'R_ENO_fwd', 'R_PGM_rev', 'R_PYK', 'R_PDH', 'R_G6PDH2r_fwd', 'R_PGL', 'R_GND', 'R_RPE_fwd', 'R_RPI_rev', 'R_TKT1_fwd', 'R_TALA_fwd', 'R_TKT2_fwd', 'R_CS', 'R_ACONTb_fwd', 'R_ACONTa_fwd', 'R_ICDHyr_fwd', 'R_SUCOAS_rev', 'R_AKGDH', 'R_SUCDi', 'R_FUM_fwd', 'R_MDH_fwd', 'R_PPC', 'R_ME2', 'R_ICL', 'R_MALS', 'R_ACKr_rev', 'R_PTAr_fwd', 'R_LDH_D_rev', 'R_ACALD_rev', 'R_ALCD2x_rev', 'R_EX_glc__D_e_rev', 'R_EX_o2_e_rev', 'R_EX_co2_e_fwd', 'R_EX_etoh_e', 'R_EX_ac_e', 'R_EX_lac__D_e', 'R_EX_succ_e', 'R_EX_pyr_e', 'R_EX_for_e', 'R_BIOMASS_Ec_iAF1260_core_59p81M'])

47

In [23]:
device = "cpu"


# create a torch dataset with this structure (X,Y,Vin)
train_data = CustomTensorDataset(data=(x_train, y_train, x_train,))
test_data  = CustomTensorDataset(data=(x_test, y_test, x_test))

#create dataloader for batches
train_loader = DataLoader(train_data, shuffle = True, batch_size=5)
test_loader = DataLoader(test_data, batch_size=5)

# initialize AMN_QP module
amn_qp = AMN_QP(input_size=1, hidden_size=200, output_size=25, drop_rate=0.25, hyper_params=hyper_params, model=model).to(device)
# initzialize mechanistic loss
criterion = MechanisticLoss(model, l1_constant=1, hyper_params=hyper_params)

# define optimizer for backpropagation
optimizer = torch.optim.Adam(amn_qp.parameters(), lr=1e-3)




In [15]:
# train step for each epoch
epochs = 2

train_losses = []
for epoch in range(epochs):
    tr_loss = train_step(amn_qp, criterion, optimizer, train_loader)
    train_losses.append(tr_loss['loss'])
    print(tr_loss['losses'])

Vref_pred = np.matmul(np.array(tr_loss['Vref_pred']), model.Pref.T) 
Vref_true = tr_loss['Vref_true']  
# Create a list of epochs or iterations for the x-axis
epochs = list(range(1, len(train_losses) + 1))

# Plot the training loss
plt.plot(epochs, train_losses, label='Train Loss', color='blue')
plt.xlabel('Epochs or Iterations')
plt.ylabel('Loss')
plt.title('Training Loss Over Time')
plt.legend()
plt.grid(True)
plt.show()

R2 = R_squared(Vref_true, Vref_pred, mode='R', plot=True)
print(R2)


AttributeError: 'dict' object has no attribute 'model'

In [None]:
te_loss = test_step(amn_qp, criterion, test_loader)

Vref_pred_te = np.matmul(np.array(te_loss['Vref_pred']), model.Pref.T)  
Vref_true_te = te_loss['Vref_true'] 

Q2 = R_squared(Vref_true_te, Vref_pred_te, mode='Q', plot=True)
print(Q2)