# Network of metabolic reactions

The human-GEM was downaloaded from: https://github.com/SysBioChalmers/Human-GEM

    J. L. Robinson, P. Kocabas, H. Wang, P.-E. Cholley, et al. An atlas of human metabolism. Sci. Signal. 13, eaaz1482 (2020). doi:10.1126/scisignal.aaz1482



In [1]:
import pandas as pd 
import networkx as nx
import re

In [2]:
data = pd.read_csv("Human-GEM.txt", sep="\t")
data

Unnamed: 0,Rxn name,Formula,Gene-reaction association,LB,UB,Objective
0,MAR03905,MAM01796c + MAM02552c -> MAM01249c + MAM02039...,ENSG00000147576 or ENSG00000172955 or ENSG0000...,0.0,1000.0,0.0
1,MAR03907,MAM01796c + MAM02554c -> MAM01249c + MAM02039...,ENSG00000117448,0.0,1000.0,0.0
2,MAR04097,MAM01252c + MAM01371c + MAM01597c -> MAM01261...,ENSG00000131069,0.0,1000.0,0.0
3,MAR04099,MAM01252m + MAM01371m + MAM01597m -> MAM01261...,ENSG00000111058 or ENSG00000154930,0.0,1000.0,0.0
4,MAR04108,MAM01257c + MAM01597c -> MAM01261c + MAM01334...,ENSG00000131069,0.0,1000.0,0.0
...,...,...,...,...,...,...
12990,MAR20168,MAM00209c -> MAM02393m,ENSG00000144182,0.0,1000.0,0.0
12991,MAR20169,MAM01412c + MAM02040c -> MAM01410c + MAM01597...,ENSG00000172497,0.0,1000.0,0.0
12992,MAR20170,MAM02040c + MAM02644c -> MAM01597c + MAM02039...,ENSG00000097021,0.0,1000.0,0.0
12993,MAR20171,MAM01650c + MAM02040c -> MAM01597c + MAM01648...,ENSG00000097021,0.0,1000.0,0.0


## Create a dictionary of reactions reactants and products

In [3]:
separators = re.compile(r'\s*(->|<=>)\s*')

mar_reactants_dict = {}
mar_products_dict = {}

for index, row in data.iterrows():
    rxn_id = row['Rxn name']
    
    reactants, sep, products = re.split(separators, row['Formula'])
    
    mar_reactants_dict[rxn_id] = reactants.split(' + ')
    
    mar_products_dict[rxn_id] = products.split(' + ')



In [4]:
mar_reactants_dict

{'MAR03905': ['MAM01796c', 'MAM02552c'],
 'MAR03907': ['MAM01796c', 'MAM02554c'],
 'MAR04097': ['MAM01252c', 'MAM01371c', 'MAM01597c'],
 'MAR04099': ['MAM01252m', 'MAM01371m', 'MAM01597m'],
 'MAR04108': ['MAM01257c', 'MAM01597c'],
 'MAR04133': ['MAM01252c', 'MAM01371c', 'MAM02039c'],
 'MAR04281': ['MAM02039x', 'MAM02553x', 'MAM02819x'],
 'MAR04388': ['MAM02039c', 'MAM02553c', 'MAM02819c'],
 'MAR04283': ['MAM01249c', 'MAM02040c', 'MAM02554c'],
 'MAR08357': ['MAM01249m', 'MAM02040m', 'MAM02552m'],
 'MAR04379': ['MAM01371c', 'MAM01845c'],
 'MAR04301': ['MAM01845c', 'MAM03130c'],
 'MAR04355': ['MAM01690c', 'MAM01785c'],
 'MAR04358': ['MAM01285c', 'MAM02039c', 'MAM02696c'],
 'MAR04360': ['MAM02040c', 'MAM02475c', 'MAM02554c'],
 'MAR04363': ['MAM00674c'],
 'MAR04365': ['MAM00674c'],
 'MAR04368': ['MAM00247c', 'MAM01285c'],
 'MAR04370': ['MAM00247c', 'MAM02040c'],
 'MAR04371': ['MAM00247c'],
 'MAR04372': ['MAM00569c', 'MAM02040c'],
 'MAR04373': ['MAM00247c', 'MAM02039c', 'MAM02553c'],
 'MAR04

In [5]:
mar_products_dict

{'MAR03905': ['MAM01249c', 'MAM02039c', 'MAM02553c '],
 'MAR03907': ['MAM01249c', 'MAM02039c', 'MAM02555c '],
 'MAR04097': ['MAM01261c', 'MAM01334c', 'MAM02759c '],
 'MAR04099': ['MAM01261m', 'MAM01334m', 'MAM02759m '],
 'MAR04108': ['MAM01261c', 'MAM01334c', 'MAM02039c '],
 'MAR04133': ['MAM01257c', 'MAM02759c '],
 'MAR04281': ['MAM02403x', 'MAM02552x '],
 'MAR04388': ['MAM02403c', 'MAM02552c '],
 'MAR04283': ['MAM01252c', '2 MAM02039c', 'MAM02555c '],
 'MAR08357': ['MAM01252m', '2 MAM02039m', 'MAM02553m '],
 'MAR04379': ['MAM01285c', 'MAM01841c', 'MAM02039c '],
 'MAR04301': ['MAM01841c', 'MAM02039c', 'MAM03106c '],
 'MAR04355': ['MAM02883c '],
 'MAR04358': ['MAM01371c', 'MAM02819c '],
 'MAR04360': ['2 MAM02039c', 'MAM02555c', 'MAM02819c '],
 'MAR04363': ['MAM02040c', 'MAM02696c '],
 'MAR04365': ['MAM00913c '],
 'MAR04368': ['MAM00913c', 'MAM01371c '],
 'MAR04370': ['MAM00913c', 'MAM02039c', 'MAM02751c '],
 'MAR04371': ['MAM00569c', 'MAM02039c '],
 'MAR04372': ['MAM00913c', 'MAM02751c

## Create the network of metabolic reactions

Directed edge list of reactions: a directed edge is created between two reactions when the product of the first reaction is used in the reactants of the second reaction.

In [6]:
edge_list = []

# Iterate through reaction IDs
for reaction_id2, products2 in mar_products_dict.items():
    for reaction_id1, reactants1 in mar_reactants_dict.items():
        common_metabolites = set(reactants1) & set(products2)
        if common_metabolites:
            edge_list.append((reaction_id2, reaction_id1, list(common_metabolites)))


In [7]:
edge_list

[('MAR03905', 'MAR04133', ['MAM02039c']),
 ('MAR03905', 'MAR04388', ['MAM02039c']),
 ('MAR03905', 'MAR04283', ['MAM01249c']),
 ('MAR03905', 'MAR04358', ['MAM02039c']),
 ('MAR03905', 'MAR04373', ['MAM02039c']),
 ('MAR03905', 'MAR08757', ['MAM02039c']),
 ('MAR03905', 'MAR03944', ['MAM02039c']),
 ('MAR03905', 'MAR04774', ['MAM02039c']),
 ('MAR03905', 'MAR08766', ['MAM02039c']),
 ('MAR03905', 'MAR08767', ['MAM02039c']),
 ('MAR03905', 'MAR04386', ['MAM02039c']),
 ('MAR03905', 'MAR04387', ['MAM02039c']),
 ('MAR03905', 'MAR04400', ['MAM02039c']),
 ('MAR03905', 'MAR04401', ['MAM02039c']),
 ('MAR03905', 'MAR08768', ['MAM02039c']),
 ('MAR03905', 'MAR04590', ['MAM02039c']),
 ('MAR03905', 'MAR04592', ['MAM02039c']),
 ('MAR03905', 'MAR04595', ['MAM02039c']),
 ('MAR03905', 'MAR08341', ['MAM02039c']),
 ('MAR03905', 'MAR08342', ['MAM02039c']),
 ('MAR03905', 'MAR08352', ['MAM02039c']),
 ('MAR03905', 'MAR08353', ['MAM02039c']),
 ('MAR03905', 'MAR06537', ['MAM02039c']),
 ('MAR03905', 'MAR01568', ['MAM012

In [8]:
edges = pd.DataFrame(edge_list, columns=['Reaction1', 'Reaction2', 'CommonMetabolites'])

edges[['Reaction1', 'Reaction2']].to_csv('multiplex/3/mar_mar.tsv', sep='\t', index=False, header=False)


## Create a dictionary of gene-reaction associations

In [9]:
mar_gene_dict = {}
for index, row in data.iterrows():
    rxn_id = row['Rxn name']
    
    if isinstance(row['Gene-reaction association'], str):
        # Extract all strings starting with ENSG and ending with a space
        genes = re.findall(r'\bENSG[0-9]+\b', row['Gene-reaction association'])
        
        mar_gene_dict[rxn_id] = set(genes)

mar_gene_dict

{'MAR03905': {'ENSG00000147576',
  'ENSG00000172955',
  'ENSG00000180011',
  'ENSG00000187758',
  'ENSG00000196344',
  'ENSG00000196616',
  'ENSG00000197894',
  'ENSG00000198099',
  'ENSG00000248144'},
 'MAR03907': {'ENSG00000117448'},
 'MAR04097': {'ENSG00000131069'},
 'MAR04099': {'ENSG00000111058', 'ENSG00000154930'},
 'MAR04108': {'ENSG00000131069'},
 'MAR04133': {'ENSG00000131069'},
 'MAR04281': {'ENSG00000111716', 'ENSG00000134333'},
 'MAR04388': {'ENSG00000111716',
  'ENSG00000134333',
  'ENSG00000166796',
  'ENSG00000166800',
  'ENSG00000171989'},
 'MAR04283': {'ENSG00000006534',
  'ENSG00000108602',
  'ENSG00000132746',
  'ENSG00000184254'},
 'MAR08357': {'ENSG00000072210',
  'ENSG00000111275',
  'ENSG00000137124',
  'ENSG00000143149',
  'ENSG00000164904'},
 'MAR04379': {'ENSG00000067057', 'ENSG00000141959', 'ENSG00000152556'},
 'MAR04301': {'ENSG00000067057', 'ENSG00000141959', 'ENSG00000152556'},
 'MAR04355': {'ENSG00000109107', 'ENSG00000136872'},
 'MAR04358': {'ENSG0000006

Convert ensembl ids to gene symbol

In [10]:
genes = pd.read_csv("genes.tsv", sep="\t") # Downloaded from Human-GEM
genes

Unnamed: 0,genes,geneENSTID,geneENSPID,geneUniProtID,geneSymbols,geneEntrezID,geneNames,geneAliases,compartments,compDataSource
0,ENSG00000000419,ENST00000466152;ENST00000371582;ENST0000068304...,ENSP00000507119;ENSP00000360638;ENSP0000050698...,O60762,DPM1,8813,dolichyl-phosphate mannosyltransferase subunit...,CDGIE;MPDS,Endoplasmic reticulum,SwissProt
1,ENSG00000001036,ENST00000002165;ENST00000451668;ENST00000367585,ENSP00000002165;ENSP00000398119,Q9BTY2,FUCA2,2519,alpha-L-fucosidase 2,dJ20N2.5;MGC1314,Extracellular,DeepLoc2
2,ENSG00000001084,ENST00000509541;ENST00000650454;ENST0000061692...,ENSP00000495056;ENSP00000497574;ENSP0000048275...,P48506,GCLC,2729,glutamate-cysteine ligase catalytic subunit,GCS;GLCL;GLCLC,Cytosol;Nucleus,CellAtlas
3,ENSG00000001630,ENST00000003100;ENST00000450723;ENST0000042286...,ENSP00000003100;ENSP00000406757;ENSP00000394268,Q16850,CYP51A1,1595,cytochrome P450 family 51 subfamily A member 1,CP51;CYP51;CYPL1;LDM;P450-14DM;P450L1,Endoplasmic reticulum,SwissProt;CellAtlas
4,ENSG00000002549,ENST00000226299;ENST00000618908;ENST0000050849...,ENSP00000226299;ENSP00000481000;ENSP0000047602...,P28838,LAP3,51056,leucine aminopeptidase 3,LAP;LAPEP;PEPS,Nucleus;Cytosol,SwissProt;CellAtlas
...,...,...,...,...,...,...,...,...,...,...
2884,ENSG00000114120,ENST00000324194;ENST00000446041;ENST0000050742...,ENSP00000320688;ENSP00000401938;ENSP0000042147...,Q96CQ1,SLC25A36,55186,solute carrier family 25 member 36,,,
2885,ENSG00000159445,ENST00000368814.8;ENST00000489410.1;ENST000004...,ENSP00000357804.3;ENSP00000433304.1;ENSP000004...,Q5T1C6,THEM4,117145,thioesterase superfamily member 4,CTMP,Cytosol;Inner mitochondria,SwissProt
2886,ENSG00000196407,ENST00000368817.10;ENST00000453881.2,ENSP00000357807.4;ENSP00000406809.2,Q8N1Q8,THEM5,284486,thioesterase superfamily member 5,ACOT15,Inner mitochondria,SwissProt
2887,ENSG00000013306,ENST00000377095;ENST00000225308;ENST0000059019...,ENSP00000366299;ENSP00000225308;ENSP0000046768...,Q9BZJ4,SLC25A39,51629,solute carrier family 25 member 39,,,


In [11]:
ensg_to_symbol = dict(zip(genes['genes'], genes['geneSymbols']))

mapped_mar_gene_dict = {mar_id: [ensg_to_symbol.get(ensg, ensg) for ensg in engrn] for mar_id, engrn in mar_gene_dict.items()}
mapped_mar_gene_dict

{'MAR03905': ['ADH5',
  'ADH1A',
  'ADHFE1',
  'ADH1C',
  'ADH1B',
  'ADH7',
  'ADH4',
  'ZADH2',
  'ADH6'],
 'MAR03907': ['AKR1A1'],
 'MAR04097': ['ACSS2'],
 'MAR04099': ['ACSS3', 'ACSS1'],
 'MAR04108': ['ACSS2'],
 'MAR04133': ['ACSS2'],
 'MAR04281': ['LDHA', 'LDHB'],
 'MAR04388': ['LDHAL6B', 'LDHAL6A', 'LDHC', 'LDHB', 'LDHA'],
 'MAR04283': ['ALDH3B1', 'ALDH1A3', 'ALDH3A1', 'ALDH3B2'],
 'MAR08357': ['ALDH3A2', 'ALDH9A1', 'ALDH2', 'ALDH7A1', 'ALDH1B1'],
 'MAR04379': ['PFKM', 'PFKL', 'PFKP'],
 'MAR04301': ['PFKM', 'PFKL', 'PFKP'],
 'MAR04355': ['ALDOB', 'ALDOC'],
 'MAR04358': ['PKM', 'PKLR'],
 'MAR04360': ['ZADH2'],
 'MAR04363': ['ENO2', 'ENO3', 'ENO1'],
 'MAR04365': ['BPGM', 'PGAM1', 'PGAM4', 'PGAM2'],
 'MAR04368': ['PGK2', 'PGK1', 'CRISP3'],
 'MAR04370': ['ACYP1', 'ACYP2'],
 'MAR04371': ['BPGM', 'PGAM1', 'PGAM4', 'PGAM2'],
 'MAR04372': ['ACYP1', 'ACYP2'],
 'MAR04373': ['GAPDH', 'GAPDHS'],
 'MAR04375': ['ALDOB', 'ALDOA', 'ALDOC'],
 'MAR04377': ['FBP2', 'FBP1'],
 'MAR04381': ['GPI'],
 '

In [12]:
bipartites = [(mar_id, gene) for mar_id, genes in mapped_mar_gene_dict.items() for gene in genes]

bipartites = pd.DataFrame(bipartites, columns=['MAR_ID', 'Gene'])
bipartites

Unnamed: 0,MAR_ID,Gene
0,MAR03905,ADH5
1,MAR03905,ADH1A
2,MAR03905,ADHFE1
3,MAR03905,ADH1C
4,MAR03905,ADH1B
...,...,...
25889,MAR20168,LIPT1
25890,MAR20169,ACOT12
25891,MAR20170,ACOT7
25892,MAR20171,ACOT7


In [13]:
bipartites.to_csv('bipartite/3_1.tsv', sep='\t', index=False, header=False)
