In [1]:
import pandas as pd
import numpy as np
import cobra
import re

from troppo.omics.readers.generic import TabularReader
from troppo.methods_wrappers import ModelBasedWrapper, ReconstructionWrapper
from troppo.omics.integration import ContinuousScoreIntegrationStrategy
from troppo.methods.reconstruction.tINIT import tINIT, tINITProperties

#parsing rule 
# patt = re.compile('__COBAMPGPRDOT__[0-9]{1}')
# def replace_alt_transcripts(x):
#     if not isinstance(x, str):
#         return x
#     # 1. remove internal COBAMP placeholders
#     x = patt.sub('', x)
#     # 2. remove unwanted leading underscores before ENSG IDs
#     x = re.sub(r'_(?=ENSG[0-9]+)', '', x)
#     # 3. add missing spaces around boolean ops (fix 'andENSG'/'ENSGorENSG')
#     x = re.sub(r'(?i)([A-Za-z0-9])(?=(and|or))', r'\1 ', x)
#     x = re.sub(r'(?i)(and|or)(?=[A-Za-z0-9])', r'\1 ', x)
#     # 4. normalize to lowercase and clean whitespace
#     x = x.replace('AND', 'and').replace('OR', 'or')
#     x = re.sub(r'\s+', ' ', x).strip()
#     return x

patt = re.compile('__COBAMPGPRDOT__[0-9]{1}')
replace_alt_transcripts = lambda x: patt.sub('', x)

The wrappers.external_wrappers module will be deprecated in a future release in favour of the wrappers module. 
    Available ModelObjectReader classes can still be loaded using cobamp.wrappers.<class>. An appropriate model 
    reader can also be created using the get_model_reader function on cobamp.wrappers


In [2]:
# load model and expression data
model = cobra.io.read_sbml_model('/home/biodata/aman/Human-GEM/model/Human-GEM.xml')
expression_data = pd.read_csv('/home/biodata/aman/data_processed/expression_data_hippocampus-development-human-brain-10XV2_annotated_gencode.csv', index_col=0)

In [3]:
expression_data

Unnamed: 0_level_0,expression
gencode_id,Unnamed: 1_level_1
ENSG00000223972,0.000000
ENSG00000227232,0.372200
ENSG00000278267,0.014134
ENSG00000243485,0.047114
ENSG00000284332,0.000000
...,...
ENSG00000198727,1548.072900
ENSG00000210195,2.289735
ENSG00000210196,0.829204
ENSG00000275215,0.000000


In [4]:
print(expression_data.head())

expression_data_transposed = expression_data.T

print("\nTransposed shape:", expression_data_transposed.shape)
print(expression_data_transposed.head())

omics_container = TabularReader(path_or_df=expression_data_transposed, 
                                nomenclature='gene',
                                omics_type='transcriptomics').to_containers()

single_sample = omics_container[0]

                 expression
gencode_id                 
ENSG00000223972    0.000000
ENSG00000227232    0.372200
ENSG00000278267    0.014134
ENSG00000243485    0.047114
ENSG00000284332    0.000000

Transposed shape: (1, 35159)
gencode_id  ENSG00000223972  ENSG00000227232  ENSG00000278267  \
expression              0.0           0.3722         0.014134   

gencode_id  ENSG00000243485  ENSG00000284332  ENSG00000237613  \
expression         0.047114              0.0              0.0   

gencode_id  ENSG00000240361  ENSG00000186092  ENSG00000233750  \
expression              0.0              0.0         0.315663   

gencode_id  ENSG00000222623  ...  ENSG00000210184  ENSG00000210191  \
expression              0.0  ...              0.0         0.014134   

gencode_id  ENSG00000198786  ENSG00000198695  ENSG00000210194  \
expression         304.5536        20.494543         0.164899   

gencode_id  ENSG00000198727  ENSG00000210195  ENSG00000210196  \
expression        1548.0729         2.289735

In [5]:
pd.DataFrame(expression_data_transposed)

gencode_id,ENSG00000223972,ENSG00000227232,ENSG00000278267,ENSG00000243485,ENSG00000284332,ENSG00000237613,ENSG00000240361,ENSG00000186092,ENSG00000233750,ENSG00000222623,...,ENSG00000210184,ENSG00000210191,ENSG00000198786,ENSG00000198695,ENSG00000210194,ENSG00000198727,ENSG00000210195,ENSG00000210196,ENSG00000275215,ENSG00000278233
expression,0.0,0.3722,0.014134,0.047114,0.0,0.0,0.0,0.0,0.315663,0.0,...,0.0,0.014134,304.5536,20.494543,0.164899,1548.0729,2.289735,0.829204,0.0,0.0


In [6]:
model_wrapper = ReconstructionWrapper(model=model, ttg_ratio=9999,
                                      gpr_gene_parse_function=replace_alt_transcripts)



In [7]:
data_map = single_sample.get_integrated_data_map(model_reader=model_wrapper.model_reader,
                                                 and_func=min, or_func=sum)

In [8]:
def score_apply(reaction_map_scores):
    return {k:0  if v is None else v for k, v in reaction_map_scores.items()}

continuous_integration = ContinuousScoreIntegrationStrategy(score_apply=score_apply)
scores = continuous_integration.integrate(data_map=data_map)

In [9]:

scores

{'MAR03905': 104.84724735899998,
 'MAR03907': 109.34191,
 'MAR04097': 6.2048993,
 'MAR04099': 17.5169444,
 'MAR04108': 6.2048993,
 'MAR04133': 6.2048993,
 'MAR04281': 553.81909,
 'MAR04388': 554.247826408,
 'MAR04283': 1.6725430700000001,
 'MAR08357': 240.054694,
 'MAR04379': 141.030721,
 'MAR04301': 141.030721,
 'MAR04355': 63.815762502,
 'MAR04358': 203.93718833399998,
 'MAR04360': 0,
 'MAR04363': 385.1795896,
 'MAR04365': 219.739182167,
 'MAR04368': 171.40033,
 'MAR04370': 73.285656,
 'MAR04371': 219.739182167,
 'MAR04372': 73.285656,
 'MAR04373': 1301.125370836,
 'MAR04375': 310.075342502,
 'MAR04377': 0.659594413,
 'MAR04381': 43.65102,
 'MAR04391': 297.51477,
 'MAR04394': 59.019568924999994,
 'MAR04396': 31.382564000000002,
 'MAR04521': 33.342499556,
 'MAR06412': 16.805525,
 'MAR07745': 6.671327,
 'MAR07747': 33.00799,
 'MAR08360': 56.494263,
 'MAR08652': 33.342499556,
 'MAR08757': 0.89987534,
 'MAR03989': 25.545149950000003,
 'MAR04122': 33.48384,
 'MAR04837': 0.8951638860000001

In [10]:
essential_reactions = [
    model_wrapper.model_reader.r_ids.index(rid)
    for rid in [
        'MAR13082'
    ]
]

#['MAR04413', 'MAR09931', 'MAR09932', 'MAR12140','MAR10024', 'MAR10062', 'MAR10063', 'MAR10064','MAR10065', 'MAR13082']

# Create the properties for the tINIT algorithm.
properties = tINITProperties(reactions_scores=[v for k, v in scores.items()], solver='CPLEX', essential_reactions=essential_reactions )

# Run the tINIT algorithm.
tinit = tINIT(S=model_wrapper.S, lb=model_wrapper.lb, ub=model_wrapper.ub, properties=properties)

model_tinit = tinit.run()



Version identifier: 22.1.1.0 | 2022-11-28 | 9160aff4d
CPXPARAM_Read_DataCheck                          1
CPXPARAM_LPMethod                                1
CPXPARAM_QPMethod                                1
CPXPARAM_Simplex_Tolerances_Optimality           1e-08
CPXPARAM_Simplex_Tolerances_Feasibility          1e-08
CPXPARAM_MIP_Tolerances_MIPGap                   0.001
CPXPARAM_MIP_Tolerances_Integrality              1.0000000000000001e-09
Tried aggregator 3 times.
MIP Presolve eliminated 1290 rows and 2311 columns.
MIP Presolve modified 22 coefficients.
Aggregator did 1117 substitutions.
Reduced MIP has 24993 rows, 61699 columns, and 129920 nonzeros.
Reduced MIP has 18512 binaries, 6 generals, 0 SOSs, and 0 indicators.
Presolve time = 0.16 sec. (162.31 ticks)
Probing fixed 37 vars, tightened 295 bounds.
Probing time = 0.11 sec. (12.23 ticks)
Tried aggregator 2 times.
Detecting symmetries...
MIP Presolve eliminated 287 rows and 767 columns.
Aggregator did 2 substitutions.
Reduced MIP h

---

In [11]:
# model.reactions
# len(model.reactions)

In [12]:
selected_reactions = [model.reactions[i] for i in model_tinit.flatten().tolist()]
len(selected_reactions)

11598

---

In [13]:
ctx_model = model.copy()

selected_ids = [r.id for r in selected_reactions]
to_remove = [r for r in ctx_model.reactions if r.id not in selected_ids]

ctx_model.remove_reactions(to_remove, remove_orphans=True)

ctx_model

  warn("need to pass in a list")


0,1
Name,HumanGEM
Memory address,754e391fa050
Number of metabolites,7853
Number of reactions,11598
Number of genes,2558
Number of groups,148
Objective expression,1.0*MAR13082 - 1.0*MAR13082_reverse_11d67
Compartments,"Cytosol, Extracellular, Endoplasmic reticulum, Mitochondria, Peroxisome, Golgi apparatus, Lysosome, Nucleus, Inner mitochondria"


In [14]:
len(model.reactions), len(ctx_model.reactions)

(12971, 11598)

In [14]:
# export
cobra.io.write_sbml_model(ctx_model, "tinit_context_specific_model.xml")

In [15]:
solution = ctx_model.optimize()
print(solution.objective_value)

0.0


In [15]:
properties

reactions_scores = [104.84724735899998, 109.34191, 6.2048993, 17.5169444, 6.2048993, 6.2048993, 553.81909, 554.247826408, 1.6725430700000001, 240.054694, 141.030721, 141.030721, 63.815762502, 203.93718833399998, 0, 385.1795896, 219.739182167, 171.40033, 73.285656, 219.739182167, 73.285656, 1301.125370836, 310.075342502, 0.659594413, 43.65102, 297.51477, 59.019568924999994, 31.382564000000002, 33.342499556, 16.805525, 6.671327, 33.00799, 56.494263, 33.342499556, 0.89987534, 25.545149950000003, 33.48384, 0.8951638860000001, 10.5063969, 17.187146578, 13.413324278, 9.865648, 32.7535763, 16.5888, 16.5888, 32.7535763, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 3.184899078, 7.15659949, 18.068176, 3.184899078, 3.184899078, 7.15659949, 18.068176, 0.17903279, 0.009422778, 100.03221, 70.939384, 20.52281, 45.526153, 16.5888, 16.5888, 25.234198268000004, 47.8253109, 0.009422778, 1241.050

In [18]:
model_wrapper.lb

array([    0.,     0.,     0., ..., -1000., -1000., -1000.])

In [19]:
model_wrapper.ub

array([1000., 1000., 1000., ..., 1000., 1000., 1000.])

In [20]:
model_wrapper.S

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [16]:
ctx_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux

Metabolite,Reaction,Flux,C-Number,C-Flux


---

In [17]:
len(model.genes)

2887

In [18]:
genes = [g.id for g in model.genes]
print(len(genes))

2887
