# E. coli data from UCSD

In [1]:
from BFAIR.mfa.INCA import INCA_script
import pandas as pd
import numpy as np
import time
import ast
import matlab.engine
import sys
import escher
from BFAIR.mfa.INCA import INCA_reimport

#### Import the data

In [2]:
INCA_script_example = INCA_script()
# measured fragments/MS data, tracers and measured fluxes should be limited to one experiment

atomMappingReactions_data_I = pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_atomMappingReactions2.csv')
modelReaction_data_I = pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_modelReactions.csv')
atomMappingMetabolite_data_I = pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_atomMappingMetabolites.csv')
measuredFluxes_data_I = pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_measuredFluxes.csv')
experimentalMS_data_I = pd.read_csv('data/MFA_modelInputsData/data-1604345289079.csv')
tracer_I = pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_tracers.csv')

#### Exclude data for irreleavnt experiments and models

In [3]:
# The files need to be limited by model id and mapping id, I picked "ecoli_RL2013_02" here
atomMappingReactions_data_I = INCA_script_example.limit_to_one_model(atomMappingReactions_data_I, 'mapping_id', 'ecoli_RL2013_02')
modelReaction_data_I = INCA_script_example.limit_to_one_model(modelReaction_data_I, 'model_id', 'ecoli_RL2013_02')
atomMappingMetabolite_data_I = INCA_script_example.limit_to_one_model(atomMappingMetabolite_data_I, 'mapping_id', 'ecoli_RL2013_02')
measuredFluxes_data_I = INCA_script_example.limit_to_one_model(measuredFluxes_data_I, 'model_id', 'ecoli_RL2013_02')

# Limiting fluxes, fragments and tracers to one experiment
measuredFluxes_data_I = INCA_script_example.limit_to_one_experiment(measuredFluxes_data_I, 'experiment_id', 'WTEColi_113C80_U13C20_01')
experimentalMS_data_I = INCA_script_example.limit_to_one_experiment(experimentalMS_data_I, 'experiment_id', 'WTEColi_113C80_U13C20_01')
tracer_I = INCA_script_example.limit_to_one_experiment(tracer_I, 'experiment_id', 'WTEColi_113C80_U13C20_01')

#### Generate the MATLAB script

Save it in your working directory. The last argument in the script_generator function will name your future .mat file

In [5]:
script = INCA_script_example.script_generator(
    modelReaction_data_I,
    atomMappingReactions_data_I,
    atomMappingMetabolite_data_I,
    measuredFluxes_data_I,
    experimentalMS_data_I,
    tracer_I
)
INCA_script_example.save_INCA_script(script, "testscript")
runner = INCA_script_example.runner_script_generator('TestFile', 10)
INCA_script_example.save_runner_script(runner=runner, scriptname="testscript")

There is no stoichiometry given for: ATPM
There is no stoichiometry given for: Ec_Biomass_INCA
There is no stoichiometry given for: EX_nh4_LPAREN_e_RPAREN_
There is no stoichiometry given for: EX_o2_LPAREN_e_RPAREN_
There is no stoichiometry given for: EX_so4_LPAREN_e_RPAREN_
There is no stoichiometry given for: FADR_NADH_CYTBD_HYD_ATPS4r
There is no stoichiometry given for: NADH_CYTBD_HYD_ATPS4r
There is no stoichiometry given for: NADTRHD_THD2pp
There is no stoichiometry given for: NADTRHD_THD2pp_reverse


#### Provide the path to you INCA installation, your working directory and the name of the previously generated MATLAB script

In [6]:
INCA_base_directory = "/Users/krv114/Documents/inca2.1" # ADD YOUR BASE DIRECTORY HERE, e.g. "/Users/Username/Documents/INCAv2.1"
script_folder = %pwd
matlab_script = "testscript"
runner_script = matlab_script + "_runner"

#### INCA will be started and your script run in MATLAB. This will produce the .mat file specified above

In [7]:
INCA_script_example.run_INCA_in_MATLAB(INCA_base_directory, script_folder, matlab_script, runner_script)


                                         Directional 
 Iteration      Residual     Step-size    derivative        Lambda
     0       1.03956e+06
     1            714586         0.177      -8.3e+05       0.51936
     2            279261         0.397     -4.12e+05       0.51936
     3            277543       0.00705     -1.21e+05       0.51936
     4            275559        0.0327     -2.98e+04       0.51936
     5            274050        0.0437     -1.69e+04       0.51936
     6            272838        0.0382     -1.55e+04       0.51936
     7            271557        0.0445     -1.41e+04       0.51936
     8            271411        0.0055     -1.32e+04       0.51936
     9            270725        0.0815     -4.03e+03       0.51936
    10            270060        0.0981     -3.22e+03       0.51936
    11            269576         0.112     -2.03e+03       0.51936
    12            269528        0.0136     -1.76e+03       0.51936
    13            269392        0.0453     -1.47e

#### Re-import the generated flux data

In [8]:
filename = 'data/MFA_modelInputsData/TestFile.mat'
simulation_info = pd.read_csv('data/MFA_modelInputsData/Re-import/experimentalMS_data_I.csv')
simulation_id = 'WTEColi_113C80_U13C20_01'

In [9]:
reimport_data = INCA_reimport()
# Succession of functions
info = reimport_data.extract_file_info(filename)
parallel, non_stationary = reimport_data.det_simulation_type(simulation_info)
m, f = reimport_data.data_extraction(filename)
model_info = reimport_data.extract_model_info(m)
simulationParameters = reimport_data.extract_sim_params(simulation_id, info, m, filename)
fittedData = reimport_data.extract_base_stats(f, simulation_id, info)
f_mnt_info = reimport_data.get_fit_info(f)
fittedMeasuredFluxes, fittedMeasuredFragments = reimport_data.sort_fit_info(f_mnt_info, simulation_info, fittedData)
f_mnt_res_info = reimport_data.get_residuals_info(f, simulation_info)
fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals = reimport_data.sort_residual_info(f_mnt_res_info, simulation_info, fittedData)
f_par_info = reimport_data.get_fitted_parameters(f, simulation_info)
fittedFluxes, fittedFragments = reimport_data.sort_parameter_info(f_par_info, simulation_info, fittedData)

In [10]:
fittedFluxes

Unnamed: 0,simulation_id,simulation_dateAndTime,rxn_id,flux,flux_stdev,flux_lb,flux_ub,flux_units,fit_alf,fit_chi2s,fit_cor,fit_cov,free,used_,comment_
0,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,26dap_DASH_MSYN,2.295040e-01,0.002608,0.224392,0.234616,mmol*gDCW-1*hr-1,0.05,,,,False,True,
1,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,ACONTa_ACONTb,2.074886e+00,16996.864976,1.185984,1000.000000,mmol*gDCW-1*hr-1,0.05,,,,False,True,
2,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,ACONTa_ACONTb_reverse,8.690514e-07,15432.592032,0.000000,28.927600,mmol*gDCW-1*hr-1,0.05,,,,True,True,
3,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,AKGDH,1.423617e-01,7673.615592,0.000000,1.919800,mmol*gDCW-1*hr-1,0.05,,,,False,True,
4,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,ALATA_L,3.435520e-01,0.003904,0.335900,0.351204,mmol*gDCW-1*hr-1,0.05,,,,False,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,TPI,2.175603e+00,25620.656341,0.000000,1000.000000,mmol*gDCW-1*hr-1,0.05,,,,False,True,
93,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,TPI_reverse,8.689299e-07,24739.593649,0.000000,1000.000000,mmol*gDCW-1*hr-1,0.05,,,,True,True,
94,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,TrpSYN,3.801600e-02,0.000432,0.037169,0.038863,mmol*gDCW-1*hr-1,0.05,,,,False,True,
95,WTEColi_113C80_U13C20_01,2022-02-01 09:18:43,TyrSYN,9.222400e-02,0.001048,0.090170,0.094278,mmol*gDCW-1*hr-1,0.05,,,,False,True,


# Published data 1

In [32]:
from BFAIR.mfa.INCA import INCA_script
import pandas as pd
import numpy as np
import time
import ast
import matlab.engine
import sys
import escher
from BFAIR.mfa.INCA import INCA_reimport
from BFAIR.parsers import modelReactions_file_parser, atomMapping_reactions2_file_parser, atom_mapping_metabolites_file_parser

In [33]:
imported_reactions = modelReactions_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/E. coli - Antoniewicz 2015/Model.xlsx',
    'ecoli_Antoniewicz2015',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)

In [34]:
imported_atom_mapping = atomMapping_reactions2_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/E. coli - Antoniewicz 2015/Model.xlsx',
    'ecoli_Antoniewicz2015',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)

In [35]:
imported_atoms_metabolites = atom_mapping_metabolites_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/E. coli - Antoniewicz 2015/Model.xlsx',
    'ecoli_Antoniewicz2015',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)

In [36]:
tracer_info = pd.DataFrame.from_dict({
    'experiment_id': [
        '[1,2]Glc', '[2,3]Glc', '[4,5,6]Glc', '[2,3,4,5,6]Glc',
        '[1] + [4,5,6]Glc (1:1)', '[1] + [4,5,6]Glc (1:1)', '[1] + [U]Glc (1:1)',
        '[1] + [U]Glc (1:1)', '[1] + [U]Glc (4:1)', '[1] + [U]Glc (4:1)',
        '20% [U]Glc', '[1]Glc', '[2]Glc', '[3]Glc', '[4]Glc', '[5]Glc', '[6]Glc'
    ],
    'met_id': ['Gluc.ext' for i in range(17)],
    'met_name': [
        '[1,2]Glc', '[2,3]Glc', '[4,5,6]Glc', '[2,3,4,5,6]Glc',
        '[1]Glc', '[4,5,6]Glc', '[1]Glc',
        '[U]Glc', '[1]Glc', '[U]Glc',
        '20% [U]Glc', '[1]Glc', '[2]Glc', '[3]Glc', '[4]Glc', '[5]Glc', '[6]Glc'
    ],
    'met_atompositions': [
        '{1,2}', '{2,3}', '{4,5,6}', '{2,3,4,5,6}', 
        '{1}', '{4,5,6}', '{1}',
        '{1,2,3,4,5,6}', '{1}', '{1,2,3,4,5,6}',
        '{1,2,3,4,5,6}', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}'
    ],
    'met_elements': [
        '{C,C}', '{C,C}', '{C,C,C}', '{C,C,C,C,C}', 
        '{C}', '{C,C,C}', '{C}',
        '{C,C,C,C,C,C}', '{C}', '{C,C,C,C,C,C}',
        '{C,C,C,C,C,C}', '{C}', '{C}', '{C}', '{C}', '{C}', '{C}'
    ],
    'ratio': [
        '1', '1', '1', '1', '0.5', '0.5', '0.5',
        '0.5', '0.8', '0.2', '0.2', '1', '1', '1', '1', '1', '1'
    ]    
}, orient='columns')

In [37]:
measured_fluxes_info = pd.DataFrame.from_dict({
    'experiment_id': [
        '[1,2]Glc', '[2,3]Glc', '[4,5,6]Glc', '[2,3,4,5,6]Glc',
        '[1] + [4,5,6]Glc (1:1)', '[1] + [4,5,6]Glc (1:1)', '[1] + [U]Glc (1:1)',
        '[1] + [U]Glc (1:1)', '[1] + [U]Glc (4:1)', '[1] + [U]Glc (4:1)',
        '20% [U]Glc', '[1]Glc', '[2]Glc', '[3]Glc', '[4]Glc', '[5]Glc', '[6]Glc'
    ],
    'model_id': ['ecoli_Antoniewicz2015' for i in range(17)],
    'rxn_id': ['v70' for i in range(17)],
    'flux_average': ['0.72' for i in range(17)],
    'flux_stdev': ['0.02' for i in range(17)],
    'flux_lb': ['0.70' for i in range(17)],
    'flux_ub': ['0.74' for i in range(17)],  
}, orient='columns')

In [38]:
experimental_mdvs = pd.read_csv('/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/E. coli - Antoniewicz 2015/MDV_mod.csv', sep=';', decimal=',')
experimental_mdvs['Frag'] = [frag[:6] for frag in experimental_mdvs['Frag']]

In [53]:
experiment_ids = []
met_ids = []
fragment_ids = []
time_points = []
intensity_normalized_averages = []
intensity_normalized_stdevs = []
met_atompositions = []
met_elements = []

experiments = list(experimental_mdvs.columns[2:])
for experiment in experiments:
    fragments = list(set([row[1] for i, row in experimental_mdvs[experimental_mdvs.columns[:2]].iterrows()]))
    #fragments_full_list = [row[1] for i, row in experimental_mdvs[experimental_mdvs.columns[:2]].iterrows()]
    for fragment in fragments:
        experiment_ids.append(experiment)
        fragment_ids.append(fragment)
        #fragments_carbons.append(fragments_full_list.count(fragment))
        mdv_list = experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment][experiment].to_list()
        mdv = '{'
        stdev = '{'
        atompos = '{'
        frag_element = '{'
        for i, mdv_value in enumerate(mdv_list):
            if i == 0:
                mdv += str(mdv_value/100)
                stdev += str(0.01)
            else:
                mdv += ',' + str(mdv_value/100)
                stdev += ',' + str(0.01)
                if i == 1:
                    atompos += str(i-1)
                    frag_element += 'C'
                else:
                    atompos += ',' + str(i-1)
                    frag_element += ',' + 'C'
        mdv += '}'
        stdev += '}'
        atompos += '}'
        frag_element += '}'

        met_ids.append(list(set(experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment]['Met'].to_list()))[0])
        time_points.append('0')
        intensity_normalized_averages.append(mdv)
        intensity_normalized_stdevs.append(stdev)
        met_atompositions.append(atompos)
        met_elements.append(frag_element)


In [55]:
mdvs = pd.DataFrame.from_dict({
    'experiment_id': experiment_ids,
    'met_id': met_ids,
    'fragment_id': fragment_ids,
    'time_point': time_points,
    'met_atompositions': met_atompositions,
    'met_elements': met_elements,
    'intensity_normalized_average': intensity_normalized_averages,
    'intensity_normalized_stdev': intensity_normalized_stdevs,  
}, orient='columns')

In [56]:
# measured fragments/MS data, tracers and measured fluxes should be limited to one experiment

atomMappingReactions_data_I = imported_atom_mapping # pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_atomMappingReactions2.csv')
modelReaction_data_I = imported_reactions # pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_modelReactions.csv')
atomMappingMetabolite_data_I = imported_atoms_metabolites # pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_atomMappingMetabolites.csv')
measuredFluxes_data_I = measured_fluxes_info # pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_measuredFluxes.csv')
experimentalMS_data_I = mdvs # pd.read_csv('data/MFA_modelInputsData/data-1604345289079.csv')
tracer_I = tracer_info # pd.read_csv('data/MFA_modelInputsData/data_stage02_isotopomer_tracers.csv')

In [57]:
INCA_script_test1 = INCA_script()
# The files need to be limited by model id and mapping id, I picked "ecoli_RL2013_02" here
atomMappingReactions_data_I = INCA_script_test1.limit_to_one_model(atomMappingReactions_data_I, 'mapping_id', 'ecoli_Antoniewicz2015')
modelReaction_data_I = INCA_script_test1.limit_to_one_model(modelReaction_data_I, 'model_id', 'ecoli_Antoniewicz2015')
atomMappingMetabolite_data_I = INCA_script_test1.limit_to_one_model(atomMappingMetabolite_data_I, 'mapping_id', 'ecoli_Antoniewicz2015')
measuredFluxes_data_I = INCA_script_test1.limit_to_one_model(measuredFluxes_data_I, 'model_id', 'ecoli_Antoniewicz2015')

# Limiting fluxes, fragments and tracers to one experiment
measuredFluxes_data_I = INCA_script_test1.limit_to_one_experiment(measuredFluxes_data_I, 'experiment_id', '[1,2]Glc')
experimentalMS_data_I = INCA_script_test1.limit_to_one_experiment(experimentalMS_data_I, 'experiment_id', '[1,2]Glc')
tracer_I = INCA_script_test1.limit_to_one_experiment(tracer_I, 'experiment_id', '[1,2]Glc')

In [58]:
script = INCA_script_test1.script_generator(
    modelReaction_data_I,
    atomMappingReactions_data_I,
    atomMappingMetabolite_data_I,
    measuredFluxes_data_I,
    experimentalMS_data_I,
    tracer_I
)
INCA_base_directory = "/Users/krv114/Documents/inca2.1" # ADD YOUR BASE DIRECTORY HERE, e.g. "/Users/Username/Documents/INCAv2.1"
script_folder = %pwd
matlab_script = "coli_lit"
runner_script = matlab_script + "_runner"
INCA_script_test1.save_INCA_script(script, matlab_script)
runner = INCA_script_test1.runner_script_generator('Coli_lit', 10)
INCA_script_test1.save_runner_script(runner=runner, scriptname=matlab_script)

There is no stoichiometry given for: v61
There is no stoichiometry given for: v62
There is no stoichiometry given for: v63
There is no stoichiometry given for: v64
There is no stoichiometry given for: v67
There is no stoichiometry given for: v68
There is no stoichiometry given for: v69
There is no stoichiometry given for: v70


In [59]:
INCA_script_test1.run_INCA_in_MATLAB(INCA_base_directory, script_folder, matlab_script, runner_script)

Index in position 1 exceeds array bounds. Index must not exceed 4.

Error in response

Error in response

Error in lof

Error in nlsqcon

Error in multistart

Error in estimate

Error in coli_lit_runner (line 1)
f=estimate(m,10);



MatlabExecutionError: 
  File /Users/krv114/Documents/inca2.1/idtools/response.p, line 0, in response

  File /Users/krv114/Documents/inca2.1/idtools/response.p, line 0, in response

  File /Users/krv114/Documents/inca2.1/idtools/lof.p, line 0, in lof

  File /Users/krv114/Documents/inca2.1/idtools/levmarq/nlsqcon.p, line 0, in nlsqcon

  File /Users/krv114/Documents/inca2.1/idtools/multistart.p, line 0, in multistart

  File /Users/krv114/Documents/inca2.1/driver/estimate.p, line 0, in estimate

  File /Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/coli_lit_runner.m, line 1, in coli_lit_runner
Index in position 1 exceeds array bounds. Index must not exceed 4.


# Published data - Cupriavidus necator

In [58]:
from BFAIR.mfa.INCA import INCA_script
import pandas as pd
import numpy as np
import time
import ast
import matlab.engine
import sys
import escher
from BFAIR.mfa.INCA import INCA_reimport
from BFAIR.parsers import modelReactions_file_parser, atomMapping_reactions2_file_parser, atom_mapping_metabolites_file_parser

#### Import using parsers

In [59]:
imported_reactions = modelReactions_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Cupriavidus necator  Alagesan 2017/reactions_2nd.xlsx',
    'Cupriavidus_necator2017',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)
imported_atom_mapping = atomMapping_reactions2_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Cupriavidus necator  Alagesan 2017/reactions_2nd.xlsx',
    'Cupriavidus_necator2017',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)
imported_atoms_metabolites = atom_mapping_metabolites_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Cupriavidus necator  Alagesan 2017/reactions_2nd.xlsx',
    'Cupriavidus_necator2017',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)

#### Manual set up of additional information

In [60]:
tracer_info = pd.DataFrame.from_dict({
    'experiment_id': [
        'D-[1-13C]fructose', '[1,2-13C]glycerol', '[1,2-13C]glycerolandCO2',
    ],
    'met_id': ['F6P.ext', 'GLY.ext', 'GLY.ext'],
    'met_name': [
        'D-[1-13C]fructose', '[1,2-13C]glycerol', '[1,2-13C]glycerolandCO2',
    ],
    'met_atompositions': [
        '{1}', '{1,2}', '{1,2}',
    ],
    'met_elements': [
        '{C}', '{C,C}', '{C,C}',
    ],
    'ratio': [
        '1', '1', '1',
    ]    
}, orient='columns')

In [61]:
measured_fluxes_info = pd.DataFrame.from_dict({
    'experiment_id': [
        'D-[1-13C]fructose', '[1,2-13C]glycerol', '[1,2-13C]glycerolandCO2',
    ],
    'model_id': ['Cupriavidus_necator2017' for i in range(3)],
    'rxn_id': [
        'ex_1', 'ex_2', 'ex_2',
        ],
    'flux_average': ['1' for i in range(3)],
    'flux_stdev': ['0.01' for i in range(3)],
    'flux_lb': ['0.99' for i in range(3)],
    'flux_ub': ['1.01' for i in range(3)],  
}, orient='columns')

#### This one should be its own parser

In [62]:
experimental_mdvs = pd.read_csv('/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Cupriavidus necator  Alagesan 2017/MDVs.csv', sep=';', decimal=',')

In [63]:
experiment_ids = []
met_ids = []
fragment_ids = []
time_points = []
intensity_normalized_averages = []
intensity_normalized_stdevs = []
met_atompositions = []
met_elements = []
sample_name_abbreviation = []

experiments = [experimental_mdvs.columns[2], experimental_mdvs.columns[4], experimental_mdvs.columns[6]]
for experiment in experiments:
    fragments = list(set([row[1] for i, row in experimental_mdvs[experimental_mdvs.columns[:2]].iterrows()]))
    for fragment in fragments:
        experiment_ids.append(experiment)
        fragment_ids.append(fragment)
        mdv_list = experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment][experiment].to_list()
        stdev_list = experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment]['stdev_' + experiment].to_list()
        mdv = '{'
        stdev = '{'
        atompos = '{'
        frag_element = '{'
        for i, mdv_value in enumerate(mdv_list):
            if i == 0:
                mdv += str(mdv_value)
                stdev += str(stdev_list[i])
            else:
                mdv += ',' + str(mdv_value)
                stdev += ',' + str(stdev_list[i])
                if i == 1:
                    atompos += str(i-1)
                    frag_element += 'C'
                else:
                    atompos += ',' + str(i-1)
                    frag_element += ',' + 'C'
        mdv += '}'
        stdev += '}'
        atompos += '}'
        frag_element += '}'

        met_ids.append(list(set(experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment]['Met'].to_list()))[0])
        time_points.append('0')
        intensity_normalized_averages.append(mdv)
        intensity_normalized_stdevs.append(stdev)
        sample_name_abbreviation.append('')
        met_atompositions.append(atompos)
        met_elements.append(frag_element)

mdvs = pd.DataFrame.from_dict({
    'experiment_id': experiment_ids,
    'met_id': met_ids,
    'fragment_id': fragment_ids,
    'time_point': time_points,
    'met_atompositions': met_atompositions,
    'met_elements': met_elements,
    'sample_name_abbreviation': sample_name_abbreviation,
    'intensity_normalized_average': intensity_normalized_averages,
    'intensity_normalized_stdev': intensity_normalized_stdevs,  
}, orient='columns')

In [64]:
# measured fragments/MS data, tracers and measured fluxes should be limited to one experiment

atomMappingReactions_data_I = imported_atom_mapping
modelReaction_data_I = imported_reactions
atomMappingMetabolite_data_I = imported_atoms_metabolites
measuredFluxes_data_I = measured_fluxes_info
experimentalMS_data_I = mdvs
tracer_I = tracer_info

In [65]:
INCA_script_test2 = INCA_script()
# The files need to be limited by model id and mapping id, I picked "ecoli_RL2013_02" here
atomMappingReactions_data_I = INCA_script_test2.limit_to_one_model(atomMappingReactions_data_I, 'mapping_id', 'Cupriavidus_necator2017')
modelReaction_data_I = INCA_script_test2.limit_to_one_model(modelReaction_data_I, 'model_id', 'Cupriavidus_necator2017')
atomMappingMetabolite_data_I = INCA_script_test2.limit_to_one_model(atomMappingMetabolite_data_I, 'mapping_id', 'Cupriavidus_necator2017')
measuredFluxes_data_I = INCA_script_test2.limit_to_one_model(measuredFluxes_data_I, 'model_id', 'Cupriavidus_necator2017')

# Limiting fluxes, fragments and tracers to one experiment
measuredFluxes_data_I = INCA_script_test2.limit_to_one_experiment(measuredFluxes_data_I, 'experiment_id', '[1,2-13C]glycerol')
experimentalMS_data_I = INCA_script_test2.limit_to_one_experiment(experimentalMS_data_I, 'experiment_id', '[1,2-13C]glycerol')
tracer_I = INCA_script_test2.limit_to_one_experiment(tracer_I, 'experiment_id', '[1,2-13C]glycerol')

In [67]:
script = INCA_script_test2.script_generator(
    modelReaction_data_I,
    atomMappingReactions_data_I,
    atomMappingMetabolite_data_I,
    measuredFluxes_data_I,
    experimentalMS_data_I,
    tracer_I
)
INCA_base_directory = "/Users/krv114/Documents/inca2.1" # ADD YOUR BASE DIRECTORY HERE, e.g. "/Users/Username/Documents/INCAv2.1"
script_folder = %pwd
matlab_script = "c_necator_lit"
runner_script = matlab_script + "_runner"
INCA_script_test2.save_INCA_script(script, matlab_script)
runner = INCA_script_test2.runner_script_generator('C_necator', 10)
INCA_script_test2.save_runner_script(runner=runner, scriptname=matlab_script)

There is no stoichiometry given for: R72


In [68]:
INCA_script_test2.run_INCA_in_MATLAB(INCA_base_directory, script_folder, matlab_script, runner_script)


                                         Directional 
 Iteration      Residual     Step-size    derivative        Lambda
     0       1.67571e+07
     1        1.6755e+07       7.2e-05     -1.48e+07      0.936766
     2        5.6792e+06           0.5     -7.38e+06      0.936766
     3       5.66816e+06       0.00159     -3.48e+06      0.936766
     4       5.66741e+06      0.000113     -3.32e+06      0.936766
     5       5.66103e+06       0.00103     -3.09e+06      0.936766
     6       5.65824e+06      0.000484     -2.88e+06      0.936766
     7       5.65707e+06      0.000221     -2.66e+06      0.936766
     8       5.64994e+06       0.00146     -2.43e+06      0.936766
     9       5.64878e+06      0.000263      -2.2e+06      0.936766
    10       5.64807e+06      0.000179     -1.98e+06      0.936766
    11       5.64039e+06       0.00218     -1.76e+06      0.936766
    12       5.62837e+06       0.00392     -1.53e+06      0.936766
    13       5.62005e+06       0.00319      -1.3e

In [69]:
filename = '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/C_necator.mat'
simulation_info = experimentalMS_data_I
simulation_id = '[1,2-13C]glycerol'

In [70]:
reimport_data = INCA_reimport()

In [71]:
# Succession of functions
info = reimport_data.extract_file_info(filename)
parallel, non_stationary = reimport_data.det_simulation_type(simulation_info)
m, f = reimport_data.data_extraction(filename)
model_info = reimport_data.extract_model_info(m)
simulationParameters = reimport_data.extract_sim_params(simulation_id, info, m, filename)
fittedData = reimport_data.extract_base_stats(f, simulation_id, info)
f_mnt_info = reimport_data.get_fit_info(f)
fittedMeasuredFluxes, fittedMeasuredFragments = reimport_data.sort_fit_info(f_mnt_info, simulation_info, fittedData)
f_mnt_res_info = reimport_data.get_residuals_info(f, simulation_info)
fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals = reimport_data.sort_residual_info(f_mnt_res_info, simulation_info, fittedData)
f_par_info = reimport_data.get_fitted_parameters(f, simulation_info)
fittedFluxes, fittedFragments = reimport_data.sort_parameter_info(f_par_info, simulation_info, fittedData)

No fluxes found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, 

In [72]:
fittedFluxes

Unnamed: 0,simulation_id,simulation_dateAndTime,rxn_id,flux,flux_stdev,flux_lb,flux_ub,flux_units,fit_alf,fit_chi2s,fit_cor,fit_cov,free,used_,comment_
0,"[1,2-13C]glycerol",2022-07-19 17:49:11,ex_1,0.003783,29109.513454,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
1,"[1,2-13C]glycerol",2022-07-19 17:49:11,ex_2,1.000000,0.009999,0.99,1.01,mmol*gDCW-1*hr-1,0.05,,,,False,True,
2,"[1,2-13C]glycerol",2022-07-19 17:49:11,R1,1.000000,0.009999,0.99,1.01,mmol*gDCW-1*hr-1,0.05,,,,False,True,
3,"[1,2-13C]glycerol",2022-07-19 17:49:11,R10,0.000001,13827.879584,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,True,True,
4,"[1,2-13C]glycerol",2022-07-19 17:49:11,R11,0.096487,55336.052499,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,"[1,2-13C]glycerol",2022-07-19 17:49:11,R70,0.016546,13760.527501,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
70,"[1,2-13C]glycerol",2022-07-19 17:49:11,R71,0.198882,47736.125077,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
71,"[1,2-13C]glycerol",2022-07-19 17:49:11,R72,0.000009,45886.182486,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
72,"[1,2-13C]glycerol",2022-07-19 17:49:11,R8,0.006147,16599.930210,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,


# Published data - Neural stem cells and astrocytes

In [27]:
from BFAIR.mfa.INCA import INCA_script
import pandas as pd
import numpy as np
import time
import ast
import matlab.engine
import sys
import escher
from BFAIR.mfa.INCA import INCA_reimport
from BFAIR.parsers import modelReactions_file_parser, atomMapping_reactions2_file_parser, atom_mapping_metabolites_file_parser

#### Import using parsers

In [28]:
imported_reactions = modelReactions_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Astrocytes and neural stem cells/Model.xlsx',
    'Neural2016',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)
imported_atom_mapping = atomMapping_reactions2_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Astrocytes and neural stem cells/Model.xlsx',
    'Neural2016',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)
imported_atoms_metabolites = atom_mapping_metabolites_file_parser(
    '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Astrocytes and neural stem cells/Model.xlsx',
    'Neural2016',
    reaction_id_col_name="Reaction ID",
    equation_col_name="Equations (Carbon atom transition)",
)

#### Manual set up of additional information

In [29]:
tracer_info = pd.DataFrame.from_dict({
    'experiment_id': [
        'Neural', 'Astrocytes',
    ],
    'met_id': ['Glc.ext', 'Glc.ext'],
    'met_name': [
        '[1-13C]glucose', '[1-13C]glucose',
    ],
    'met_atompositions': [
        '{1}', '{1}',
    ],
    'met_elements': [
        '{C}', '{C}',
    ],
    'ratio': [
        '1', '1',
    ]    
}, orient='columns')

In [30]:
flux_neural = ['256.3', '237.8', '-2.6', '-1.8', '2.8', '-0.6', '2.4', '26.1', '9.0',  '-0.3', '3.3', '3.0', '0.8', '3.7', '3.9', '12.9']
flux_stdev_neural = ['1.3', '10.1', '0.7', '0.4', '1.4', '0.3', '1.1', '6.5', '1.4', '0.1', '1.9', '2.2', '0.4', '2.2', '2.3', '5.1']
flux_lb_neural = []
flux_ub_neural = []
for i in range(len(flux_neural)):
    flux_lb_neural.append(str(float(flux_neural[i]) - float(flux_stdev_neural[i])))
    flux_ub_neural.append(str(float(flux_neural[i]) + float(flux_stdev_neural[i])))

flux_astro = ['175.7', '348.6', '-11.9', '-10.5', '4.1', '-1.6', '10.4', '3.4', '-0.9', '-1.7', '-6.6', '1.9', '1.5', '-2.1', '6.6', '48.0']
flux_stdev_astro = ['17.9', '9.6', '5.8', '3.4', '1.7', '0.3', '2.2', '1.0', '0.6', '0.3', '3.1', '0.3', '0.7', '0.8', '0.9', '4.4']
flux_lb_astro = []
flux_ub_astro = []
for i in range(len(flux_astro)):
    flux_lb_astro.append(str(float(flux_astro[i]) - float(flux_stdev_astro[i])))
    flux_ub_astro.append(str(float(flux_astro[i]) + float(flux_stdev_astro[i])))

In [31]:
measured_fluxes_info = pd.DataFrame.from_dict({
    'experiment_id': ['Neural' for i in range(16)] + ['Astrocytes' for i in range(16)],
    'model_id': ['Neural2016' for i in range(32)],
    'rxn_id': [
        'R45', 'R46', 'R52', 'R51', 'R54', 'R53', 'R55', 'R50', 'R47', 'R56', 'R62', 'R57', 'R63', 'R58', 'R59', 'R48',
        'R45', 'R46', 'R52', 'R51', 'R54', 'R53', 'R50', 'R65', 'R60', 'R66', 'R47', 'R63', 'R58', 'R61', 'R49', 'R48',
        ],
    'flux_average': flux_neural + flux_astro,
    'flux_stdev': flux_stdev_neural + flux_stdev_astro,
    'flux_lb': flux_lb_neural + flux_lb_astro,
    'flux_ub': flux_ub_neural + flux_ub_astro,
}, orient='columns')

#### This one should be its own parser

In [32]:
experimental_mdvs = pd.read_csv('/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Literature data/Astrocytes and neural stem cells/MDVs.csv', sep=';', decimal=',')

In [33]:
experiment_ids = []
met_ids = []
fragment_ids = []
time_points = []
intensity_normalized_averages = []
intensity_normalized_stdevs = []
met_atompositions = []
met_elements = []
sample_name_abbreviation = []

experiments = list(experimental_mdvs.columns[2:])
for experiment in experiments:
    fragments = list(set([row[1] for i, row in experimental_mdvs[experimental_mdvs.columns[:2]].iterrows()]))
    for fragment in fragments:
        fragment_ids.append(fragment)
        mdv_list = experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment][experiment].to_list()
        mdv = '{'
        stdev = '{'
        atompos = '{'
        frag_element = '{'
        for i, mdv_value in enumerate(mdv_list):
            if i == 0:
                mdv += str(mdv_value)
                stdev += str(0.01)
            else:
                mdv += ',' + str(mdv_value)
                stdev += ',' + str(0.01)
                if i == 1:
                    atompos += str(i-1)
                    frag_element += 'C'
                else:
                    atompos += ',' + str(i-1)
                    frag_element += ',' + 'C'
        mdv += '}'
        stdev += '}'
        atompos += '}'
        frag_element += '}'

        met_ids.append(list(set(experimental_mdvs[experimental_mdvs[experimental_mdvs.columns[1]] == fragment]['Met'].to_list()))[0])
        sample_name_abbreviation.append('')
        intensity_normalized_averages.append(mdv)
        intensity_normalized_stdevs.append(stdev)
        met_atompositions.append(atompos)
        met_elements.append(frag_element)
        if '_033' in experiment:
            time_points.append('0.33')
        elif '_3' in experiment:
            time_points.append('3')
        elif '_12' in experiment:
            time_points.append('12')
        elif '_24' in experiment:
            time_points.append('24')
        if 'N_' in experiment:
            experiment_ids.append('Neural')
        elif 'A_' in experiment:
            experiment_ids.append('Astrocytes')


mdvs = pd.DataFrame.from_dict({
    'experiment_id': experiment_ids,
    'met_id': met_ids,
    'fragment_id': fragment_ids,
    'time_point': time_points,
    'met_atompositions': met_atompositions,
    'met_elements': met_elements,
    'sample_name_abbreviation': sample_name_abbreviation,
    'intensity_normalized_average': intensity_normalized_averages,
    'intensity_normalized_stdev': intensity_normalized_stdevs,  
}, orient='columns')

In [34]:
# measured fragments/MS data, tracers and measured fluxes should be limited to one experiment

atomMappingReactions_data_I = imported_atom_mapping
modelReaction_data_I = imported_reactions
atomMappingMetabolite_data_I = imported_atoms_metabolites
measuredFluxes_data_I = measured_fluxes_info
experimentalMS_data_I = mdvs
tracer_I = tracer_info

In [35]:
INCA_script_test3 = INCA_script()
# The files need to be limited by model id and mapping id, I picked "ecoli_RL2013_02" here
atomMappingReactions_data_I = INCA_script_test3.limit_to_one_model(atomMappingReactions_data_I, 'mapping_id', 'Neural2016')
modelReaction_data_I = INCA_script_test3.limit_to_one_model(modelReaction_data_I, 'model_id', 'Neural2016')
atomMappingMetabolite_data_I = INCA_script_test3.limit_to_one_model(atomMappingMetabolite_data_I, 'mapping_id', 'Neural2016')
measuredFluxes_data_I = INCA_script_test3.limit_to_one_model(measuredFluxes_data_I, 'model_id', 'Neural2016')

# Limiting fluxes, fragments and tracers to one experiment
measuredFluxes_data_I = INCA_script_test3.limit_to_one_experiment(measuredFluxes_data_I, 'experiment_id', 'Neural')
experimentalMS_data_I = INCA_script_test3.limit_to_one_experiment(experimentalMS_data_I, 'experiment_id', 'Neural')
tracer_I = INCA_script_test3.limit_to_one_experiment(tracer_I, 'experiment_id', 'Neural')

In [36]:
script = INCA_script_test3.script_generator(
    modelReaction_data_I,
    atomMappingReactions_data_I,
    atomMappingMetabolite_data_I,
    measuredFluxes_data_I,
    experimentalMS_data_I,
    tracer_I
)
INCA_base_directory = "/Users/krv114/Documents/inca2.1" # ADD YOUR BASE DIRECTORY HERE, e.g. "/Users/Username/Documents/INCAv2.1"
script_folder = %pwd
matlab_script = "neural_lit"
runner_script = matlab_script + "_runner"
INCA_script_test3.save_INCA_script(script, matlab_script)
runner = INCA_script_test3.runner_script_generator('Neural', 10)
INCA_script_test3.save_runner_script(runner=runner, scriptname=matlab_script)

In [38]:
INCA_script_test3.run_INCA_in_MATLAB(INCA_base_directory, script_folder, matlab_script, runner_script)

Error using deal (line 37)
The number of outputs should match the number of inputs.

Error in base/subsasgn>subsa

Error in base/subsasgn

Error in base/subsasgn

Error in base/subsasgn

Error in base/subsasgn

Error in base/subsasgn

Error in base/subsasgn

Error in neural_lit (line 520)
m.expts(1).data_ms(1).idvs.id(1,2) = {'3PG_0_1_Neural'};



MatlabExecutionError: 
  File /Applications/MATLAB/MATLAB_R2021b.app/toolbox/matlab/datatypes/deal.m, line 37, in deal

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsa

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsasgn

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsasgn

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsasgn

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsasgn

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsasgn

  File /Users/krv114/Documents/inca2.1/class/@base/subsasgn.p, line 0, in subsasgn

  File /Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/neural_lit.m, line 520, in neural_lit
The number of outputs should match the number of inputs.


In [None]:
filename = '/Users/krv114/Documents/GitHub/AutoFlow-OmicsDataHandling/docs/examples/Neural.mat'
simulation_info = experimentalMS_data_I
simulation_id = 'Neural'

In [None]:
reimport_data = INCA_reimport()

In [None]:
# Succession of functions
info = reimport_data.extract_file_info(filename)
parallel, non_stationary = reimport_data.det_simulation_type(simulation_info)
m, f = reimport_data.data_extraction(filename)
model_info = reimport_data.extract_model_info(m)
simulationParameters = reimport_data.extract_sim_params(simulation_id, info, m, filename)
fittedData = reimport_data.extract_base_stats(f, simulation_id, info)
f_mnt_info = reimport_data.get_fit_info(f)
fittedMeasuredFluxes, fittedMeasuredFragments = reimport_data.sort_fit_info(f_mnt_info, simulation_info, fittedData)
f_mnt_res_info = reimport_data.get_residuals_info(f, simulation_info)
fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals = reimport_data.sort_residual_info(f_mnt_res_info, simulation_info, fittedData)
f_par_info = reimport_data.get_fitted_parameters(f, simulation_info)
fittedFluxes, fittedFragments = reimport_data.sort_parameter_info(f_par_info, simulation_info, fittedData)

No fluxes found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
No MS data found
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, check input files
Error retriving information, 

In [None]:
fittedFluxes

Unnamed: 0,simulation_id,simulation_dateAndTime,rxn_id,flux,flux_stdev,flux_lb,flux_ub,flux_units,fit_alf,fit_chi2s,fit_cor,fit_cov,free,used_,comment_
0,"[1,2-13C]glycerol",2022-07-16 11:02:13,ex_1,1.000000e-07,5.393810e+04,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
1,"[1,2-13C]glycerol",2022-07-16 11:02:13,ex_2,1.000000e+00,9.994977e-03,0.99,1.01,mmol*gDCW-1*hr-1,0.05,,,,False,True,
2,"[1,2-13C]glycerol",2022-07-16 11:02:13,R1,1.000000e+00,9.994977e-03,0.99,1.01,mmol*gDCW-1*hr-1,0.05,,,,False,True,
3,"[1,2-13C]glycerol",2022-07-16 11:02:13,R10,1.377663e-06,1.643564e+04,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,True,True,
4,"[1,2-13C]glycerol",2022-07-16 11:02:13,R11,9.877318e-02,4.700457e+04,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,"[1,2-13C]glycerol",2022-07-16 11:02:13,R70,1.761577e-02,1.286131e+04,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
70,"[1,2-13C]glycerol",2022-07-16 11:02:13,R71,1.920038e-01,4.246336e+04,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
71,"[1,2-13C]glycerol",2022-07-16 11:02:13,R72,2.831328e-03,3.350636e+04,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
72,"[1,2-13C]glycerol",2022-07-16 11:02:13,R8,1.000000e-07,1.169627e-11,0.00,1000.00,mmol*gDCW-1*hr-1,0.05,,,,False,True,
