In [2]:
import pandas as pd
import numpy as np
import cobra
import re
import os

from troppo.omics.readers.generic import TabularReader
from troppo.methods_wrappers import ModelBasedWrapper, ReconstructionWrapper
from troppo.omics.integration import ContinuousScoreIntegrationStrategy
from troppo.methods.reconstruction.imat import IMAT, IMATProperties

The wrappers.external_wrappers module will be deprecated in a future release in favour of the wrappers module. 
    Available ModelObjectReader classes can still be loaded using cobamp.wrappers.<class>. An appropriate model 
    reader can also be created using the get_model_reader function on cobamp.wrappers


In [3]:
# parsing rule 
patt = re.compile('__COBAMPGPRDOT__[0-9]{1}') # e.g __COBAMPGPRDOT__2
replace_alt_transcripts = lambda x: patt.sub('', x) #  empty string (prune)

In [4]:
# load model and expression data
model = cobra.io.read_sbml_model('/home/biodata/aman/Human-GEM/model/Human-GEM.xml')
expression_data = pd.read_csv('/home/biodata/aman/expression_data_ensembl.csv', index_col=0)

In [5]:
expression_data

Unnamed: 0_level_0,expression
gene,Unnamed: 1_level_1
ENSG00000223972,0.000000
ENSG00000227232,0.015341
ENSG00000278267,0.000000
ENSG00000243485,0.000000
ENSG00000284332,0.000000
...,...
ENSG00000198727,9560.073000
ENSG00000210195,14.681607
ENSG00000210196,2.561994
ENSG00000275215,0.000000


In [4]:
print(expression_data.head())

expression_data_transposed = expression_data.T

print("\nTransposed shape:", expression_data_transposed.shape)
print(expression_data_transposed.head())

omics_container = TabularReader(path_or_df=expression_data_transposed, 
                                nomenclature='gene',
                                omics_type='transcriptomics').to_containers()

single_sample = omics_container[0]

                 expression
gene                       
ENSG00000223972    0.000000
ENSG00000227232    0.015341
ENSG00000278267    0.000000
ENSG00000243485    0.000000
ENSG00000284332    0.000000

Transposed shape: (1, 35160)
gene        ENSG00000223972  ENSG00000227232  ENSG00000278267  \
expression              0.0         0.015341              0.0   

gene        ENSG00000243485  ENSG00000284332  ENSG00000237613  NaN  \
expression              0.0              0.0              0.0  0.0   

gene        ENSG00000240361  ENSG00000186092  ENSG00000233750  ...  \
expression              0.0              0.0         0.046024  ...   

gene        ENSG00000210184  ENSG00000210191  ENSG00000198786  \
expression              0.0         0.061365        856.93335   

gene        ENSG00000198695  ENSG00000210194  ENSG00000198727  \
expression        129.11223         1.488104         9560.073   

gene        ENSG00000210195  ENSG00000210196  ENSG00000275215  ENSG00000278233  
expression        

In [16]:
model_wrapper = ReconstructionWrapper(model=model, ttg_ratio=9999,
                                      gpr_gene_parse_function=replace_alt_transcripts)

data_map = single_sample.get_integrated_data_map(model_reader=model_wrapper.model_reader,
                                                 and_func=min, or_func=sum)

In [6]:
def score_apply(reaction_map_scores):
    return {k:0  if v is None else v for k, v in reaction_map_scores.items()}

continuous_integration = ContinuousScoreIntegrationStrategy(score_apply=score_apply)
scores = continuous_integration.integrate(data_map=data_map)

In [7]:
scores

{'MAR03905': 78.286562788,
 'MAR03907': 44.24426,
 'MAR04097': 31.771797,
 'MAR04099': 98.04613599999999,
 'MAR04108': 31.771797,
 'MAR04133': 31.771797,
 'MAR04281': 506.59981,
 'MAR04388': 507.765747424,
 'MAR04283': 5.73763936,
 'MAR08357': 225.6702612,
 'MAR04379': 277.10958,
 'MAR04301': 277.10958,
 'MAR04355': 188.00741567,
 'MAR04358': 353.61657154,
 'MAR04360': 0,
 'MAR04363': 674.694227,
 'MAR04365': 196.491128,
 'MAR04368': 154.47135642,
 'MAR04370': 134.68111499999998,
 'MAR04371': 196.491128,
 'MAR04372': 134.68111499999998,
 'MAR04373': 1215.1982752000001,
 'MAR04375': 802.45645567,
 'MAR04377': 0.85911185,
 'MAR04381': 136.00047,
 'MAR04391': 305.41425,
 'MAR04394': 113.80163007,
 'MAR04396': 30.728589,
 'MAR04521': 29.82345269,
 'MAR06412': 20.081738,
 'MAR07745': 3.804638,
 'MAR07747': 39.304367,
 'MAR08360': 46.637497,
 'MAR08652': 29.82345269,
 'MAR08757': 11.996882,
 'MAR03989': 108.3554822,
 'MAR04122': 10.508779,
 'MAR04837': 8.913284529999999,
 'MAR05395': 29.6853

In [8]:
reaction_ids = model_wrapper.model_reader.r_ids
exp_vector = np.array([scores[rid] for rid in reaction_ids])
exp_vector

array([78.28656279, 44.24426   , 31.771797  , ...,  7.962126  ,
        7.962126  ,  7.962126  ])

In [18]:
os.environ["COBAMP_SOLVER"] = "CPLEX"

In [23]:
# Create the properties for the IMAT algorithm.
properties = IMATProperties(exp_vector=exp_vector, exp_thresholds=(25,75)) #solver='CPLEX'

# Run the iMAT algorithm.
imat = IMAT(S=model_wrapper.S, lb=model_wrapper.lb, ub=model_wrapper.ub, properties=properties)

model_imat = imat.run()

Solution was not optimal


In [None]:
pd.DataFrame(model_imat)

Unnamed: 0,0
0,0
1,1
2,5
3,6
4,9
...,...
9732,12964
9733,12967
9734,12968
9735,12969


In [None]:
# variable will contain the indices of the reactions that should be kept in the final model. 

In [None]:
print(type(model_imat))

<class 'numpy.ndarray'>


In [None]:
pd.DataFrame(model_imat).to_csv('troppo_output_imat.tsv', sep='\t')

---

In [None]:
# model.reactions
# len(model.reactions)

In [None]:
selected_reactions = [model.reactions[i] for i in model_imat.flatten().tolist()]
selected_reactions

[<Reaction MAR03905 at 0x7509955abe20>,
 <Reaction MAR03907 at 0x7509955a8070>,
 <Reaction MAR04133 at 0x75099546d600>,
 <Reaction MAR04281 at 0x75099546c370>,
 <Reaction MAR08357 at 0x75099546eaa0>,
 <Reaction MAR04379 at 0x75099546f160>,
 <Reaction MAR04301 at 0x75099546f7c0>,
 <Reaction MAR04355 at 0x75099546fa90>,
 <Reaction MAR04358 at 0x75099546fd00>,
 <Reaction MAR04360 at 0x750995490310>,
 <Reaction MAR04363 at 0x7509955abfd0>,
 <Reaction MAR04365 at 0x750995490790>,
 <Reaction MAR04368 at 0x750995491450>,
 <Reaction MAR04371 at 0x750995491b40>,
 <Reaction MAR04373 at 0x750995492800>,
 <Reaction MAR04375 at 0x750995492500>,
 <Reaction MAR04381 at 0x750995493580>,
 <Reaction MAR04391 at 0x7509954933d0>,
 <Reaction MAR07745 at 0x7509954b8d90>,
 <Reaction MAR08360 at 0x7509954b8e50>,
 <Reaction MAR08652 at 0x7509954b95a0>,
 <Reaction MAR03989 at 0x7509954ba1a0>,
 <Reaction MAR05395 at 0x7509954ba290>,
 <Reaction MAR05396 at 0x7509954bb3a0>,
 <Reaction MAR05397 at 0x7509954bbfd0>,


---

In [None]:
ctx_model = model.copy()

selected_ids = [r.id for r in selected_reactions]
to_remove = [r for r in ctx_model.reactions if r.id not in selected_ids]

ctx_model.remove_reactions(to_remove, remove_orphans=True)

ctx_model

0,1
Name,HumanGEM
Memory address,7509154a0760
Number of metabolites,7792
Number of reactions,9737
Number of genes,2174
Number of groups,148
Objective expression,0
Compartments,"Cytosol, Extracellular, Endoplasmic reticulum, Mitochondria, Peroxisome, Golgi apparatus, Lysosome, Nucleus, Inner mitochondria"


In [None]:
len(model.reactions), len(ctx_model.reactions)

(12971, 9737)

In [None]:
# export
cobra.io.write_sbml_model(ctx_model, "tinit_context_specific_model.xml")