# Analyzing synthetically generated models

In [13]:
import tellurium as te
import teUtils as tu
import os
import re
import csv
import pandas as pd
import numpy as np
from tqdm import tqdm

In [8]:
FOLDER_NAME = '10sp/'
DATA_FOLDER = FOLDER_NAME + 'generated_data/'
PERTURBATION_LEVELS = [10, 50] # in percent
NOISE_LEVEL = [10, 50] # in percent

## Generating perturbation datasets 

In [3]:
# first create directory to hold the data
os.mkdir(DATA_FOLDER)

In [11]:
with open(FOLDER_NAME + "passlist.txt") as file:
    passlist = [line.rstrip() for line in file]

Develop perturbation datasets for all models within the FOLDER_NAME.

In [14]:
for pl in PERTURBATION_LEVELS:
    for modelPath in tqdm(os.listdir(FOLDER_NAME + 'sbml/')):
        r = te.loads(FOLDER_NAME + 'sbml/' + modelPath)
        
        exMet = r.getBoundarySpeciesIds()
        inMet = r.getFloatingSpeciesIds()
        fluxnums = range(len(r.getReactionRates()))
        fluxstr = [str(x) for x in fluxnums] 
        fluxIDs = ['flux_' + num for num in fluxstr]
        e_list = [i for i in r.getGlobalParameterIds() if 'E' in i]
        pad_list = []

        if len(e_list) < len(fluxIDs):
            pad_start = len(e_list) # the number you start counting from
            for i in range(len(fluxIDs) - len(e_list)):
                pad_list.append('E' + str(pad_start + i))         
        
        pertLevel = pl/100 
        perturbation_level = [1 - pertLevel, 1 + pertLevel]
        
        header = e_list + pad_list + exMet + inMet + fluxIDs        

        modelNo = re.split(r'[_|.]', modelPath)[1]
        
        with open(DATA_FOLDER + f'data_{modelNo}_pt{pl}.csv', 'w', encoding='UTF8', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(header)
            
            try: # base case
                spConc = list(r.simulate(0,1000000)[-1])[1:]
                r.conservedMoietyAnalysis = True
                r.steadyState()
                
                enzymes = [r.getValue(e) for e in e_list]
                exMet_values = [r.getValue(m) for m in exMet]
                # seems to be opposite of how I thought it should be written
                exMet_values[exMet_values != 0] = 1e-6 
                
                fluxes = list(r.getReactionRates())
                
                if pad_list: 
                    pad = [1 for i in pad_list]
                    writer.writerow(enzymes + pad + exMet_values + spConc + fluxes)
                else: 
                    writer.writerow(enzymes + exMet_values + spConc + fluxes)
            
                # perturbed cases
                for params in e_list:
                    for level in perturbation_level:
                        r.resetToOrigin()
                        r.setValue(params, level*r.getValue(params))
                        
                        spConc = list(r.simulate(0,1000000)[-1])[1:]
                        r.steadyState()
                        enzymes = [r.getValue(e) for e in e_list]
                        exMet_values = [r.getValue(m) for m in exMet]
                        # seems to be opposite of how I thought it should be written
                        exMet_values[exMet_values != 0] = 1e-6 
                        fluxes = list(r.getReactionRates())
                        
                        if pad_list: 
                            pad = [1 for i in pad_list]
                            writer.writerow(enzymes + pad + exMet_values + spConc + fluxes)
                        else: 
                            writer.writerow(enzymes + exMet_values + spConc + fluxes)
            except:
                pass #print('error')


100%|██████████| 40/40 [00:05<00:00,  6.97it/s]
100%|██████████| 40/40 [00:05<00:00,  6.93it/s]


## Adding Noise

In [15]:
datafiles = [f for f in os.listdir(DATA_FOLDER)]

for f in datafiles: 
    df = (pd.read_csv(DATA_FOLDER + f))

    for nl in NOISE_LEVEL:
        noiseLevel = nl/100 
        noise = np.random.uniform(low=1-noiseLevel, high=1+noiseLevel, size=df.shape) 
        noised_data = df.multiply(noise)
        fileName = f.replace('.csv', f'_n{nl}.csv')
        noised_data.to_csv(DATA_FOLDER + fileName)

## Plotting analysis of models