In [1]:
from scripts.grid_generators import *
from scripts.neural_networks import *
from models.MavelliPURE import *

from tensorflow.keras.losses import MeanSquaredLogarithmicError


import numpy as np
import pandas as pd
import os
from scipy.integrate import odeint
import matplotlib.pyplot as plt


In [2]:
Grid_Path = "./datasets/grids/"
Plot_Path = "./Plots/"


In [3]:
TargetSpecies = {
                     "NTP" : {"Look_Up": "NTP", "initial_condition_vector_index" : 0, "max_conc_mM" : 1500},
                     "tRNA" : {"Look_Up" : "T", "initial_condition_vector_index" : 6, "max_conc_mM" : 1.9},
                     "Amino Acids" : {"Look_Up" : "A", "initial_condition_vector_index" : 7, "max_conc_mM" : 300},
                     "Creatine_Phosphate" : {"Look_Up" : "CP", "initial_condition_vector_index" : 8, "max_conc_mM" : 20000},
                     "TL Enzymes" : {"Look_Up" : "CTL", "initial_condition_vector_index" : 10, "max_conc_mM" : 2.2}
}

# iterate over TargetSpecies and store that max concs in a list which is converted to a np.array
max_concs_array = []
for key in TargetSpecies:
    max_concs_array.append(TargetSpecies[key]["max_conc_mM"])
    
max_concs_array = np.array(max_concs_array)

In [4]:
PermissiblePercentagesOfMaxConcs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
grid_size = 30
NumOfTargetSpecies = len(TargetSpecies)

In [5]:
initialgrid, array_to_avoid = generate_initial_grid(grid_size, max_concs_array, PermissiblePercentagesOfMaxConcs, NumOfTargetSpecies)

initialgrid_df = pd.DataFrame(initialgrid, columns=TargetSpecies.keys())

initialgrid_df.to_csv(Grid_Path+"initial_grid_mM.csv", index=None)

# iterate over initial grid

In [6]:

keysVar = ['NTP','NXP','nt','Ppi','ATr','a','T','A','CP','C','CTL']
valuesVar = [1500,0,0,0,0,0,1.9,300,20000,0,2.2]

initial_concs_dict = dict(zip(keysVar, valuesVar))


In [7]:
TMAX = 6*60*60 # sec
NSTEPS = TMAX

In [8]:

endpoint_protein_concentrations = []

for composition in initialgrid:
    
    original_concentrations = list(initial_concs_dict.values())
    updated_concentrations = original_concentrations.copy()
    
    for new_conc, key in zip(composition, TargetSpecies):
        
        
        index = TargetSpecies[key]["initial_condition_vector_index"]
        
        updated_concentrations[index] = new_conc
        
    
    sol, time = solvePURE(TMAX, NSTEPS, updated_concentrations)
    
    # last time point and just get the polymerised protein at index 5
    endpoint_protein_concentrations.append(sol[-1,:][5])
    
initialgrid_df["Predicted Final Protein"] = endpoint_protein_concentrations


# Train Neural Networks

In [9]:

# Select the input data using the TargetSpecies.keys
TargetSpeciesKeys = list(TargetSpecies.keys())
x_train = initialgrid_df[TargetSpeciesKeys].values


# produces np array of 1D
y_train = initialgrid_df["Predicted Final Protein"].values
y_train  = np.expand_dims(y_train, axis=1)



In [10]:
input_nodes = len(TargetSpecies)
num_output_nodes = 1

In [11]:
model = define_model(input_nodes, num_output_nodes)

model.compile(optimizer='rmsprop',
              loss=MeanSquaredLogarithmicError(),
              metrics=['accuracy'])


model.fit(x_train, y_train, epochs=10)



In [13]:
random_grid_size = 1000
simulate_input, array_to_avoid = generate_random_grid(array_to_avoid, max_concs_array, random_grid_size, NumOfTargetSpecies, PermissiblePercentagesOfMaxConcs)

print(simulate_input.shape)
print(type(simulate_input))
print(simulate_input[0])

(1000, 5)
<class 'numpy.ndarray'>
[1.5e+02 5.7e-01 2.4e+02 1.4e+04 4.4e-01]


In [14]:
predictions = model.predict(simulate_input)
predictions = predictions.reshape(-1)


simulate_input_preds = pd.DataFrame(simulate_input, columns = TargetSpeciesKeys)
simulate_input_preds["Predicted Final Protein"] = predictions
simulate_input_preds



In [17]:
simulate_input_preds = simulate_input_preds.sort_values(by ='Predicted Final Protein', ascending=False)
simulate_input_preds.reset_index(drop=True, inplace=True)
simulate_input_preds

Unnamed: 0,NTP,tRNA,Amino Acids,Creatine_Phosphate,TL Enzymes,Predicted Final Protein
0,1050.0,1.71,30.0,2000.0,1.10,2.365553
1,900.0,0.19,60.0,2000.0,0.22,-1.882362
2,900.0,0.76,60.0,2000.0,0.22,-1.905523
3,900.0,1.14,60.0,2000.0,1.54,-2.087475
4,900.0,1.52,60.0,2000.0,1.54,-2.106075
...,...,...,...,...,...,...
995,150.0,1.14,240.0,20000.0,1.98,-519.876526
996,150.0,1.90,240.0,20000.0,0.88,-519.911438
997,150.0,1.52,270.0,20000.0,1.32,-522.962891
998,150.0,1.14,270.0,20000.0,0.88,-523.029419


In [18]:
Top_performing_predictions = simulate_input_preds.iloc[:100,:].copy()
Top_performing_predictions.head

<bound method NDFrame.head of        NTP  tRNA  Amino Acids  Creatine_Phosphate  TL Enzymes  \
0   1050.0  1.71         30.0              2000.0        1.10   
1    900.0  0.19         60.0              2000.0        0.22   
2    900.0  0.76         60.0              2000.0        0.22   
3    900.0  1.14         60.0              2000.0        1.54   
4    900.0  1.52         60.0              2000.0        1.54   
..     ...   ...          ...                 ...         ...   
95   300.0  0.95         90.0              2000.0        0.66   
96   450.0  0.19        210.0              2000.0        1.98   
97  1350.0  1.71        300.0              4000.0        0.88   
98  1050.0  0.38        150.0              4000.0        1.10   
99  1050.0  1.33        150.0              4000.0        0.44   

    Predicted Final Protein  
0                  2.365553  
1                 -1.882362  
2                 -1.905523  
3                 -2.087475  
4                 -2.106075  
..       