In [1]:
import pandas as pd
import numpy as np
import cobra
import re
import os

from troppo.omics.readers.generic import TabularReader
from troppo.methods_wrappers import ModelBasedWrapper, ReconstructionWrapper
from troppo.omics.integration import ContinuousScoreIntegrationStrategy
from troppo.methods.reconstruction.imat import IMAT, IMATProperties

The wrappers.external_wrappers module will be deprecated in a future release in favour of the wrappers module. 
    Available ModelObjectReader classes can still be loaded using cobamp.wrappers.<class>. An appropriate model 
    reader can also be created using the get_model_reader function on cobamp.wrappers


In [2]:
from cobamp.core.linear_systems import get_default_solver

print("COBAMP default solver:", get_default_solver())

COBAMP default solver: CPLEX


In [3]:
# parsing rule 
patt = re.compile('__COBAMPGPRDOT__[0-9]{1}') # e.g __COBAMPGPRDOT__2
replace_alt_transcripts = lambda x: patt.sub('', x) #  empty string (prune)

In [4]:
# load model and expression data
model = cobra.io.read_sbml_model('/home/biodata/aman/Human-GEM/model/Human-GEM.xml')
expression_data = pd.read_csv('/home/biodata/aman/data/data_processed/Brain_fibroblasts_gencode.csv', index_col=0)

In [5]:
expression_data

Unnamed: 0_level_0,expression
gencode_id,Unnamed: 1_level_1
ENSG00000223972,0.000000
ENSG00000227232,0.000000
ENSG00000278267,0.000000
ENSG00000243485,0.000000
ENSG00000284332,0.000000
...,...
ENSG00000198727,3104.131300
ENSG00000210195,5.930706
ENSG00000210196,0.000000
ENSG00000275215,0.000000


In [6]:
print(expression_data.head())

expression_data_transposed = expression_data.T

print("\nTransposed shape:", expression_data_transposed.shape)
print(expression_data_transposed.head())

omics_container = TabularReader(path_or_df=expression_data_transposed, 
                                nomenclature='gene',
                                omics_type='transcriptomics').to_containers()

single_sample = omics_container[0]

                 expression
gencode_id                 
ENSG00000223972         0.0
ENSG00000227232         0.0
ENSG00000278267         0.0
ENSG00000243485         0.0
ENSG00000284332         0.0

Transposed shape: (1, 35159)
gencode_id  ENSG00000223972  ENSG00000227232  ENSG00000278267  \
expression              0.0              0.0              0.0   

gencode_id  ENSG00000243485  ENSG00000284332  ENSG00000237613  \
expression              0.0              0.0              0.0   

gencode_id  ENSG00000240361  ENSG00000186092  ENSG00000233750  \
expression              0.0              0.0         1.186141   

gencode_id  ENSG00000222623  ...  ENSG00000210184  ENSG00000210191  \
expression              0.0  ...              0.0              0.0   

gencode_id  ENSG00000198786  ENSG00000198695  ENSG00000210194  \
expression        587.13983        36.770374         1.186141   

gencode_id  ENSG00000198727  ENSG00000210195  ENSG00000210196  \
expression        3104.1313         5.930706

In [7]:
model_wrapper = ReconstructionWrapper(model=model, ttg_ratio=9999,
                                      gpr_gene_parse_function=replace_alt_transcripts)

data_map = single_sample.get_integrated_data_map(model_reader=model_wrapper.model_reader,
                                                 and_func=min, or_func=sum)



In [8]:
def score_apply(reaction_map_scores):
    return {k:0  if v is None else v for k, v in reaction_map_scores.items()}

continuous_integration = ContinuousScoreIntegrationStrategy(score_apply=score_apply)
scores = continuous_integration.integrate(data_map=data_map)

In [9]:
scores

{'MAR03905': 117.4279756,
 'MAR03907': 102.00814,
 'MAR04097': 9.489129,
 'MAR04099': 34.398093,
 'MAR04108': 9.489129,
 'MAR04133': 9.489129,
 'MAR04281': 659.49447,
 'MAR04388': 659.49447,
 'MAR04283': 40.3288011,
 'MAR08357': 303.652144,
 'MAR04379': 148.267643,
 'MAR04301': 148.267643,
 'MAR04355': 7.116847,
 'MAR04358': 169.61818,
 'MAR04360': 0,
 'MAR04363': 230.111375,
 'MAR04365': 164.8736215,
 'MAR04368': 97.26357,
 'MAR04370': 49.817929,
 'MAR04371': 164.8736215,
 'MAR04372': 49.817929,
 'MAR04373': 1117.3449411,
 'MAR04375': 329.747247,
 'MAR04377': 1.1861411,
 'MAR04381': 48.631786,
 'MAR04391': 397.35727,
 'MAR04394': 32.0258113,
 'MAR04396': 77.099178,
 'MAR04521': 91.33287,
 'MAR06412': 5.930706,
 'MAR07745': 15.419835,
 'MAR07747': 53.37635,
 'MAR08360': 75.91303,
 'MAR08652': 91.33287,
 'MAR08757': 0.0,
 'MAR03989': 26.095105099999998,
 'MAR04122': 56.934776,
 'MAR04837': 7.116847,
 'MAR05395': 30.8396695,
 'MAR05396': 36.7703755,
 'MAR09727': 17.792118,
 'MAR05397': 7

In [10]:
reaction_ids = model_wrapper.model_reader.r_ids
exp_vector = np.array([scores[rid] for rid in reaction_ids])
exp_vector

array([117.4279756, 102.00814  ,   9.489129 , ...,   3.5584235,
         3.5584235,   3.5584235])

In [11]:
os.environ["COBAMP_SOLVER"] = "CPLEX"

In [12]:
import optlang
from optlang.cplex_interface import Model as CPLEXModel

optlang.config = {'solver': 'cplex'}

import os
os.environ["OPTLANG_DEFAULT_SOLVER"] = "CPLEX"

In [13]:
print("Available solvers:", optlang.available_solvers)

Available solvers: {'GUROBI': False, 'GLPK': True, 'MOSEK': False, 'CPLEX': True, 'COINOR_CBC': False, 'SCIPY': True, 'OSQP': False, 'HIGHS': False}


In [14]:
# Create the properties for the IMAT algorithm.
properties = IMATProperties(exp_vector=exp_vector, exp_thresholds=(25,75))

# Run the iMAT algorithm.
imat = IMAT(S=model_wrapper.S, lb=model_wrapper.lb, ub=model_wrapper.ub, properties=properties)

model_imat = imat.run()

In [None]:
properties

exp_vector = [0 0 0 ... 0 0 0]
exp_thresholds = (25, 75)
tolerance = 1e-08
epsilon = 1

In [None]:
pd.DataFrame(model_imat)

Unnamed: 0,0


In [None]:
# variable will contain the indices of the reactions that should be kept in the final model. 

In [None]:
print(type(model_imat))

<class 'numpy.ndarray'>


In [None]:
pd.DataFrame(model_imat).to_csv('troppo_output_imat.tsv', sep='\t')

---

In [None]:
# model.reactions
# len(model.reactions)

In [None]:
selected_reactions = [model.reactions[i] for i in model_imat.flatten().tolist()]
selected_reactions

[]

---

In [None]:
ctx_model = model.copy()

selected_ids = [r.id for r in selected_reactions]
to_remove = [r for r in ctx_model.reactions if r.id not in selected_ids]

ctx_model.remove_reactions(to_remove, remove_orphans=True)

ctx_model

  warn("need to pass in a list")


0,1
Name,HumanGEM
Memory address,793f6aa33010
Number of metabolites,0
Number of reactions,0
Number of genes,0
Number of groups,148
Objective expression,0
Compartments,


In [None]:
len(model.reactions), len(ctx_model.reactions)

(12971, 0)

In [None]:
# export
cobra.io.write_sbml_model(ctx_model, "tinit_context_specific_model.xml")