## Parsing the Small Molecule Regulatory Network

#### From https://www.ncbi.nlm.nih.gov/pubmed/28903046

The file mmc2.csv could not be opened with pandas. Opening in Excel and saving as .xlsx works. Open excel file with pandas

In [41]:
import pandas as pd
import cameo

In [42]:
model = cameo.load_model("iJO1366")

In [2]:
df = pd.read_excel("../Data/Small_molecule_regulatory_network/mmc2.xlsx")

Only the metabolite and reaction identifiers are needed, as well as mode of regulation

In [13]:
full_smrn = df[["bigg.metabolite", "bigg.reaction", "Mode"]]

In [53]:
print(len(full_smrn))
smrn = full_smrn.reset_index().groupby(["bigg.metabolite", "bigg.reaction", "Mode"]).count().reset_index()[["bigg.metabolite", "bigg.reaction", "Mode"]]
print(len(smrn))

interaction_counts = smrn.groupby(["bigg.metabolite", "bigg.reaction"]).count().reset_index()
doublets = interaction_counts[interaction_counts["Mode"] > 1]
doublet_set = {(row["bigg.metabolite"], row["bigg.reaction"]) for idx, row in doublets.iterrows()}

good_idx = []
for idx, row in smrn.iterrows():
    if not (row["bigg.metabolite"], row["bigg.reaction"]) in doublet_set:
        good_idx.append(idx)

smrn = smrn.reindex(good_idx)

2760
1739


In [57]:
metabolite_ids = set(smrn["bigg.metabolite"])
reaction_ids = set(smrn["bigg.reaction"])

In [81]:
['acon_t', 'lipidads', 'apoacp', 'acon_c', 'ala_b', '26dap__m', 'metsox_s__L']

['acon_t', 'lipidads', 'apoacp', 'acon_c', 'ala_b', '26dap__m', 'metsox_s__L']

In [121]:
['no3r2bpp',
 'zn2abcpp',
 'icdhyr',
 'aoxsr2',
 'glyc3pabcpp',
 'fmnrx2',
 'argabcpp',
 'ptrcabcpp',
 'kabcpp',
 'aso3t8pp',
 'piuabcpp',
 'sulri',
 'mg2uabcpp',
 'atps4rpp',
 'treptspp']

['no3r2bpp',
 'zn2abcpp',
 'icdhyr',
 'aoxsr2',
 'glyc3pabcpp',
 'fmnrx2',
 'argabcpp',
 'ptrcabcpp',
 'kabcpp',
 'aso3t8pp',
 'piuabcpp',
 'sulri',
 'mg2uabcpp',
 'atps4rpp',
 'treptspp']

In [131]:
model.reactions.TREptspp

0,1
Reaction identifier,TREptspp
Name,Trehalose transport via PEP:Pyr PTS (periplasm)
Memory address,0x0130e8c2ef0
Stoichiometry,"pep_c + tre_p --> pyr_c + tre6p_c  Phosphoenolpyruvate + Trehalose --> Pyruvate + Alpha,alpha'-Trehalose 6-phosphate"
GPR,b2417 and b2415 and b2416 and b4240
Lower bound,0.0
Upper bound,1000.0


In [144]:
metabolite_map = {
    "acon_t": "acon_T",
    "lipidads": "lipidAds",
    "apoacp": "apoACP",
    "acon_c": "acon_C",
    "ala_b": "ala_B",
    "26dap__m": "26dap__M",
    "metsox_s__l": "metsox_S__L"
}

reaction_map = {
    'no3r2bpp': "NO3R2bpp",
    'zn2abcpp': "ZN2abcpp",
    'icdhyr': "ICDHyr",
    'aoxsr2': "AOXSr2",
    'glyc3pabcpp': "GLYC2Pabcpp",
    'fmnrx2': "FMNRx2",
    'argabcpp': "ARGabcpp",
    'ptrcabcpp': "PTRCabcpp",
    'kabcpp': "Kabcpp",
    'aso3t8pp': "ASO3t8pp",
    'piuabcpp': "PIuabcpp",
    'sulri': "SULR",
    'mg2uabcpp': "MG2uabcpp",
    'atps4rpp': "ATPS4rpp",
    'treptspp': "TREptspp"
}

unknown_metabolites = []
unknown_reactions = []
for met_id in metabolite_ids:
    original_met_id = met_id
    if met_id in metabolite_map:
        continue
    
    if met_id.endswith("__l"):
        met_id = met_id.replace("__l", "__L")
        
    if met_id.endswith("__d"):
        met_id = met_id.replace("__d", "__D")
        
    if met_id.endswith("__r"):
        met_id = met_id.replace("__r", "__R")
    
    for suffix in ("_c", "_p", "_e"):
        try:
            met = model.metabolites.get_by_id(met_id + suffix)
        except KeyError:
            pass
        else:
            metabolite_map[original_met_id] = met.id[:-2]
            break
    else:
        unknown_metabolites.append(original_met_id)
        
for reac_id in reaction_ids:
    if reac_id in reaction_map:
        continue
    original_reac_id = reac_id
    reac_id = reac_id.upper()
    try:
        reac = model.reactions.get_by_id(reac_id)
    except KeyError:
        if reac_id.endswith("PP"):
            alt_name = reac_id[:-2] + "pp"
        else:
            alt_name = reac_id[:-1] + reac_id[-1].lower()
        try:
            reac = model.reactions.get_by_id(alt_name)
        except KeyError:
            unknown_reactions.append(original_reac_id)
        else:
            reaction_map[original_reac_id] = alt_name
    else:
        reaction_map[original_reac_id] = reac_id
        
print(len(unknown_metabolites), "unknown metabolites out of", len(metabolite_ids))
print(len(unknown_reactions), "unknown reactions out of", len(reaction_ids))

0 unknown metabolites out of 323
0 unknown reactions out of 364


In [166]:
converted_smrn = smrn.copy()
converted_smrn["bigg.metabolite"] = converted_smrn["bigg.metabolite"].map(metabolite_map.__getitem__)
converted_smrn["bigg.reaction"] = converted_smrn["bigg.reaction"].map(reaction_map.__getitem__)

for reac_id in converted_smrn["bigg.reaction"]:
    assert reac_id in model.reactions, reac_id
for met_id in converted_smrn["bigg.metabolite"]:
    assert (
        met_id + "_c" in model.metabolites
        # or
        # met_id + "_p" in model.metabolites
        # or
        # met_id + "_e" in model.metabolites
    ), met_id

In [189]:
def reaction_compartments(reac_id):
    reac = model.reactions.get_by_id(reac_id)
    return set([met.compartment for met in reac.metabolites])

def generate_compartment_suffix(reac_id):
    compartments = reaction_compartments(reac_id)
    if "c" in compartments:
        return "c"
    elif compartments == {"p"}:
        return "p"
    else:
        raise RuntimeError("WTF")

In [190]:
test_df = converted_smrn.copy()
test_df["comp"] = converted_smrn["bigg.reaction"].map(reaction_compartments)

In [219]:
def add_compartment_to_metabolite(row):
    suffix = generate_compartment_suffix(row["bigg.reaction"])
    met_name = row["bigg.metabolite"]
    met_id = met_name + "_" + suffix
    if suffix == "p" and met_id not in model.metabolites:
        met_id = met_id[:-1] + "c"
        assert met_id in model.metabolites
    return met_id

converted_smrn["metabolite_id"] = converted_smrn.apply(
    add_compartment_to_metabolite,
    #lambda x: x["bigg.metabolite"] + "_" + generate_compartment_suffix(x["bigg.reaction"]),
    axis=1
)

In [221]:
for met_id in converted_smrn["metabolite_id"]:
    assert met_id in model.metabolites

In [222]:
# Metal ions do not participate in cellular metabolism and are thus not interesting in this project
metal_ions = ["zn2", "cu2", "ca2", "mg2", "mn2", "k", "hg2", "cobalt2", "fe2", "ni2", "cd2", "cl", "na1", "ag", "fe3"]

In [223]:
reduced_smrn = converted_smrn[~converted_smrn["bigg.metabolite"].isin(metal_ions)]

In [224]:
reduced_smrn.groupby("bigg.metabolite").count().sort_values("Mode", ascending=False)

Unnamed: 0_level_0,bigg.reaction,Mode,metabolite_id
bigg.metabolite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
atp,55,55,55
adp,42,42,42
amp,41,41,41
ppi,33,33,33
pi,27,27,27
cys__L,23,23,23
gtp,23,23,23
nadp,19,19,19
nadph,17,17,17
nadh,15,15,15


In [225]:
reduced_smrn.to_csv("../Data/Small_molecule_regulatory_network/Reduced_smrn.csv", index=None)