In [1]:
import pandas as pd
import numpy as np
import cobra
import re

from troppo.omics.readers.generic import TabularReader
from troppo.methods_wrappers import ModelBasedWrapper, ReconstructionWrapper
from troppo.omics.integration import ContinuousScoreIntegrationStrategy
from troppo.methods.reconstruction.gimme import GIMME, GIMMEProperties

The wrappers.external_wrappers module will be deprecated in a future release in favour of the wrappers module. 
    Available ModelObjectReader classes can still be loaded using cobamp.wrappers.<class>. An appropriate model 
    reader can also be created using the get_model_reader function on cobamp.wrappers


In [2]:
patt = re.compile('__COBAMPGPRDOT__[0-9]{1}')
replace_alt_transcripts = lambda x: patt.sub('', x)

In [None]:
# def replace_alt_transcripts(x):
#     # remove COBAMP transcript suffixes like __COBAMPGPRDOT__12
#     x = re.sub(r'__COBAMPGPRDOT__\d+', '', x)
#     # remove leading underscores before ENSG IDs
#     x = re.sub(r'_(?=ENSG)', '', x)
#     # insert missing spaces around 'and'/'or' if theyâ€™re glued to gene IDs
#     x = re.sub(r'(?i)([A-Z0-9])(?=and)', r'\1 ', x)
#     x = re.sub(r'(?i)(and)(?=[A-Z0-9])', r'\1 ', x)
#     x = re.sub(r'(?i)([A-Z0-9])(?=or)', r'\1 ', x)
#     x = re.sub(r'(?i)(or)(?=[A-Z0-9])', r'\1 ', x)
#     # ensure lowercase boolean operators and single spaces
#     x = re.sub(r'\bAND\b', 'and', x)
#     x = re.sub(r'\bOR\b', 'or', x)
#     x = re.sub(r'\s+', ' ', x).strip()
#     return x

In [3]:
# load model and expression data
model = cobra.io.read_sbml_model('/home/biodata/aman/Human-GEM/model/Human-GEM.xml')
expression_data = pd.read_csv('/home/biodata/aman/data_processed/expression_data_hippocampus-development-human-brain-10XV2_annotated_gencode.csv', index_col=0)

In [4]:
print(expression_data.head())

expression_data_transposed = expression_data.T

print("\nTransposed shape:", expression_data_transposed.shape)
print(expression_data_transposed.head())

omics_container = TabularReader(path_or_df=expression_data_transposed, 
                                nomenclature='gene',
                                omics_type='transcriptomics').to_containers()

single_sample = omics_container[0]

                 expression
gencode_id                 
ENSG00000223972    0.000000
ENSG00000227232    0.372200
ENSG00000278267    0.014134
ENSG00000243485    0.047114
ENSG00000284332    0.000000

Transposed shape: (1, 35159)
gencode_id  ENSG00000223972  ENSG00000227232  ENSG00000278267  \
expression              0.0           0.3722         0.014134   

gencode_id  ENSG00000243485  ENSG00000284332  ENSG00000237613  \
expression         0.047114              0.0              0.0   

gencode_id  ENSG00000240361  ENSG00000186092  ENSG00000233750  \
expression              0.0              0.0         0.315663   

gencode_id  ENSG00000222623  ...  ENSG00000210184  ENSG00000210191  \
expression              0.0  ...              0.0         0.014134   

gencode_id  ENSG00000198786  ENSG00000198695  ENSG00000210194  \
expression         304.5536        20.494543         0.164899   

gencode_id  ENSG00000198727  ENSG00000210195  ENSG00000210196  \
expression        1548.0729         2.289735

In [5]:
model_wrapper = ReconstructionWrapper(model=model, ttg_ratio=9999,
                                      gpr_gene_parse_function=replace_alt_transcripts)

data_map = single_sample.get_integrated_data_map(model_reader=model_wrapper.model_reader,
                                                 and_func=min, or_func=sum)



In [6]:
list(data_map.get_scores().items())[:10]

[('MAR03905', 104.84724735899998),
 ('MAR03907', 109.34191),
 ('MAR04097', 6.2048993),
 ('MAR04099', 17.5169444),
 ('MAR04108', 6.2048993),
 ('MAR04133', 6.2048993),
 ('MAR04281', 553.81909),
 ('MAR04388', 554.247826408),
 ('MAR04283', 1.6725430700000001),
 ('MAR08357', 240.054694)]

In [7]:
def score_apply(reaction_map_scores):
    return {k:0  if v is None else v for k, v in reaction_map_scores.items()}

continuous_integration = ContinuousScoreIntegrationStrategy(score_apply=score_apply)
scores = continuous_integration.integrate(data_map=data_map)

In [8]:
model = model_wrapper.model_reader.model

# search for biomass in reaction names
for r in model.reactions:
    if 'med' in r.name.lower() or 'growth' in r.name.lower():
        print(r.id, r.name)

MAR04357 Exchange of neuromedin N
MAR04359 Exchange of neuromedin N(1-4)
MAR09927 Transport of Creatine Phosphate, Extracellular (Assumed Diffusion)
MAR11802 Export Mediated by ATP Binding Cassette (ABC) Transporters
MAR11924 Assumed Passive Diffusion into Extracellular Space
MAR11928 Assumed Passive Diffusion into Extracellular Space
MAR11931 Assumed Passive Diffusion into Extracellular Space
MAR11935 Assumed Passive Diffusion into Extracellular Space
MAR11937 Assumed Passive Diffusion into Extracellular Space
MAR11939 Assumed Passive Diffusion into Extracellular Space
MAR11941 Assumed Passive Diffusion into Extracellular Space
MAR11943 Assumed Passive Diffusion into Extracellular Space
MAR11944 Assumed Passive Diffusion into Extracellular Space
MAR11946 Assumed Passive Diffusion into Extracellular Space
MAR11950 Assumed Passive Diffusion into Extracellular Space
MAR11955 Assumed Passive Diffusion into Extracellular Space
MAR11960 Assumed Passive Diffusion into Extracellular Space
MAR

In [None]:
# print(model_wrapper.model_reader.r_ids)

In [9]:
# Get the index of the biomass reaction in the model. This will be used as objective for the GIMME algorithm.
idx_objective = model_wrapper.model_reader.r_ids.index('MAR13082')
idx_objective

12725

In [10]:
# Create the properties for the GIMME algorithm.
properties = GIMMEProperties(exp_vector=[v for k, v in scores.items()], obj_frac=0.1, objectives=[{idx_objective: 1}],
                             preprocess=True, flux_threshold=0.25, solver='CPLEX',
                             reaction_ids= model_wrapper.model_reader.r_ids, metabolite_ids=model_wrapper.model_reader.m_ids)

# Run the GIMME algorithm.
gimme = GIMME(S=model_wrapper.S, lb=model_wrapper.lb, ub=model_wrapper.ub, properties=properties)

gimme_run = gimme.run()

In [11]:
gimme_run

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 76,
 79,
 80,
 81,
 83,
 84,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 101,
 104,
 105,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 115,
 116,
 118,
 119,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 168,
 169,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185,
 186,
 187,
 188,
 189,
 190,
 191,
 192,
 193,
 194,
 195,
 196,
 197,
 198,
 199,
 200,
 201,
 202,
 203,
 204,
 205

In [12]:
print(type(gimme_run))
print(gimme_run)

<class 'list'>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 73, 74, 76, 79, 80, 81, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 101, 104, 105, 107, 108, 109, 110, 111, 112, 113, 115, 116, 118, 119, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 238, 239, 240, 2

In [13]:
pd.DataFrame(gimme_run)

Unnamed: 0,0
0,0
1,1
2,2
3,3
4,4
...,...
7290,12964
7291,12965
7292,12968
7293,12969


In [14]:
model_GIMME_final = model.copy()
r_ids = [r.id for r in model_GIMME_final.reactions]
to_remove_ids = [r_ids[r] for r in np.where(gimme_run==0)[0]]
model_GIMME_final.remove_reactions(to_remove_ids,True)

  to_remove_ids = [r_ids[r] for r in np.where(gimme_run==0)[0]]


In [15]:
model_GIMME_final

0,1
Name,HumanGEM
Memory address,7a96ef4c7eb0
Number of metabolites,8455
Number of reactions,12971
Number of genes,2887
Number of groups,148
Objective expression,1.0*MAR13082 - 1.0*MAR13082_reverse_11d67
Compartments,"Cytosol, Extracellular, Lysosome, Endoplasmic reticulum, Mitochondria, Peroxisome, Golgi apparatus, Nucleus, Inner mitochondria"


In [17]:
properties

objectives = [{12725: 1}]
exp_vector = [104.84724735899998, 109.34191, 6.2048993, 17.5169444, 6.2048993, 6.2048993, 553.81909, 554.247826408, 1.6725430700000001, 240.054694, 141.030721, 141.030721, 63.815762502, 203.93718833399998, 0, 385.1795896, 219.739182167, 171.40033, 73.285656, 219.739182167, 73.285656, 1301.125370836, 310.075342502, 0.659594413, 43.65102, 297.51477, 59.019568924999994, 31.382564000000002, 33.342499556, 16.805525, 6.671327, 33.00799, 56.494263, 33.342499556, 0.89987534, 25.545149950000003, 33.48384, 0.8951638860000001, 10.5063969, 17.187146578, 13.413324278, 9.865648, 32.7535763, 16.5888, 16.5888, 32.7535763, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 7.15659949, 18.068176, 3.184899078, 3.184899078, 7.15659949, 18.068176, 0.17903279, 0.009422778, 100.03221, 70.939384, 20.52281, 45.526153, 16.5888, 16.5888, 25.234198268000004, 47.8253109, 0

In [16]:
for i in properties["core"]:
    print(len(i))

Exception: core has not been registered as a mandatory or optional property.

In [None]:
# variable will contain the indices of the reactions that should be kept in the final model. 

In [None]:
print(type(gimme_run))

<class 'list'>


In [None]:
pd.DataFrame(gimme_run).to_csv('gimme_output_tinit.tsv', sep='\t')

---

In [None]:
# model.reactions
# len(model.reactions)

In [None]:
selected_reactions = [model.reactions[i] for i in gimme_run]
selected_reactions

[<Reaction MAR03905 at 0x7a8aca2d3e50>,
 <Reaction MAR03907 at 0x7a8aca2d0130>,
 <Reaction MAR04097 at 0x7a8aca194b20>,
 <Reaction MAR04099 at 0x7a8aca194550>,
 <Reaction MAR04108 at 0x7a8aca194040>,
 <Reaction MAR04133 at 0x7a8aca195630>,
 <Reaction MAR04281 at 0x7a8aca1943a0>,
 <Reaction MAR04388 at 0x7a8aca195330>,
 <Reaction MAR04283 at 0x7a8aca196380>,
 <Reaction MAR08357 at 0x7a8aca196ad0>,
 <Reaction MAR04379 at 0x7a8aca197190>,
 <Reaction MAR04301 at 0x7a8aca1977f0>,
 <Reaction MAR04355 at 0x7a8aca197ac0>,
 <Reaction MAR04358 at 0x7a8aca197d30>,
 <Reaction MAR04363 at 0x7a8aca196350>,
 <Reaction MAR04365 at 0x7a8aca1bc7c0>,
 <Reaction MAR04368 at 0x7a8aca1bd480>,
 <Reaction MAR04370 at 0x7a8aca1bd990>,
 <Reaction MAR04371 at 0x7a8aca1bdb70>,
 <Reaction MAR04372 at 0x7a8aca1bd360>,
 <Reaction MAR04373 at 0x7a8aca1be830>,
 <Reaction MAR04375 at 0x7a8aca1be530>,
 <Reaction MAR04377 at 0x7a8aca1bec50>,
 <Reaction MAR04381 at 0x7a8aca1bf5b0>,
 <Reaction MAR04391 at 0x7a8aca1bf400>,


---

In [None]:
ctx_model = model.copy()

selected_ids = [r.id for r in selected_reactions]
to_remove = [r for r in ctx_model.reactions if r.id not in selected_ids]

ctx_model.remove_reactions(to_remove, remove_orphans=True)

ctx_model

  warn("need to pass in a list")


0,1
Name,HumanGEM
Memory address,7a8b1cf573a0
Number of metabolites,6687
Number of reactions,7599
Number of genes,2826
Number of groups,148
Objective expression,1.0*MAR13082 - 1.0*MAR13082_reverse_11d67
Compartments,"Cytosol, Lysosome, Endoplasmic reticulum, Extracellular, Mitochondria, Peroxisome, Golgi apparatus, Nucleus, Inner mitochondria"


In [None]:
len(model.reactions), len(ctx_model.reactions)

(12971, 7599)

In [None]:
# export
cobra.io.write_sbml_model(ctx_model, "gimme_context_specific_model.xml")

In [None]:
solution = ctx_model.optimize()
print(solution.objective_value)

14.390743271937275


In [None]:
ctx_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
MAM01291e,MAR00566,0.00229,22,0.00%
MAM01362e,MAR00568,0.2232,20,0.08%
MAM01741e,MAR00571,0.01216,22,0.00%
MAM01689e,MAR00573,1.352,22,0.51%
MAM00094e,MAR00575,0.007832,22,0.00%
MAM01696e,MAR00576,0.04431,20,0.02%
MAM02648e,MAR00577,0.0237,20,0.01%
MAM02675e,MAR00617,0.02654,16,0.01%
MAM01432e,MAR00618,0.0008244,26,0.00%
MAM02456e,MAR00620,0.0008244,17,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
MAM03127e,MAR01964,-80.79,40,60.57%
MAM03037e,MAR08386,-64.81,5,6.07%
MAM01115e,MAR09234,-12.55,20,4.71%
MAM03118e,MAR09437,-22.83,4,1.71%
MAM03971e,MAR10024,-14.39,0,0.00%
MAM03596e,MAR10553,-3.968,14,1.04%
MAM03597e,MAR10554,-0.1537,17,0.05%
MAM04011e,MAR10688,-65.66,21,25.85%


In [None]:
gimme.sol.__dict__['_Solution__value_map']

OrderedDict([('MAR03905', 0.0),
             ('MAR03907', 0.0),
             ('MAR04097', 0.0),
             ('MAR04099', 0.0),
             ('MAR04108', 0.0),
             ('MAR04133', 0.0),
             ('MAR04281', 0.0),
             ('MAR04388', 9.334853748074465),
             ('MAR04283', 0.0),
             ('MAR08357', 0.0),
             ('MAR04379', 0.0),
             ('MAR04301', 0.0),
             ('MAR04355', 0.0),
             ('MAR04358', 0.0),
             ('MAR04360', 0.0),
             ('MAR04363', 6.878879337791976),
             ('MAR04365', 6.878879337791976),
             ('MAR04368', 6.850503675413943),
             ('MAR04370', 0.0),
             ('MAR04371', 0.008452324963669305),
             ('MAR04372', 0.0),
             ('MAR04373', 6.842051350450274),
             ('MAR04375', 2.8236621547375385),
             ('MAR04377', 2.770533254965903),
             ('MAR04381', 0.835533362273009),
             ('MAR04391', 3.6740653870990654),
             ('MAR04394

In [None]:
fluxes = gimme.sol.__dict__['_Solution__value_map']