In [1]:
import networkx as nx
import json
import itertools
import glob
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import copy
import re
import collections

Import compounds and reactions

In [2]:
reactions = dict()
for fname in glob.glob("../../BioXP/kegg/2018-09-25/reaction/*"):
    with open(fname) as f:
        reaction_json = json.load(f)[0]
        reactions[reaction_json["entry_id"]] = reaction_json

In [3]:
compounds = dict()
for fname in glob.glob("../../BioXP/kegg/2018-09-25/compound/*"):
    with open(fname) as f:
        compound_json = json.load(f)[0]
        compounds[compound_json["entry_id"]] = compound_json

Add `"elements"` key to `compounds` dict

In [4]:
for i in compounds:
    elements = re.findall(r"([A-Z][a-z]?)",compounds[i]['formula'])
    compounds[i]["elements"] = set(elements)

Identify reactions that spawn new elements

In [5]:
apparate_reactions = dict()
for r in reactions:
    
    elements_sub = set()
    elements_prod = set()
    
    if "substrates" in reactions[r]:
        for c in reactions[r]["substrates"]:
            if c in compounds:
                elements_sub = elements_sub.union(compounds[c]['elements'])
    
    if "products" in reactions[r]:
        for c in reactions[r]["products"]:
            if c in compounds:
                elements_prod = elements_prod.union(compounds[c]['elements'])
        
    if elements_sub != elements_prod:
        apparate_reactions[reactions[r]["entry_id"]] = list(elements_sub^elements_prod)
        print(reactions[r]["entry_id"],elements_sub^elements_prod)
#         print(reactions[r]["entry_id"],elements_sub^elements_prod,elements_sub,elements_prod)
        

R11305 {'P'}
R08431 {'O'}
R07547 {'R'}
R10685 {'S'}
R10390 {'S'}
R04146 {'C'}
R08535 {'O'}
R06317 {'O'}
R07443 {'H', 'O', 'C'}
R10152 {'S', 'Sn'}
R00914 {'P', 'S'}
R08427 {'O'}
R03142 {'C', 'N'}
R03841 {'H', 'P'}
R04886 {'C', 'N'}
R08519 {'P'}
R00083 {'C'}
R04613 {'R'}
R07341 {'P'}
R04644 {'R'}
R00312 {'C', 'Fe', 'H', 'O', 'N'}
R00168 {'P'}
R07838 {'O', 'S'}
R05106 {'R'}
R03656 {'R'}
R09467 {'O', 'C'}
R03986 {'P', 'C', 'N'}
R08299 {'O'}
R11470 {'S'}
R08578 {'R'}
R04808 {'S'}
R02876 {'H', 'C'}
R02460 {'H', 'O', 'C', 'N'}
R09851 {'Cl'}
R09144 {'Cl'}
R09847 {'R'}
R05188 {'R'}
R01348 {'R'}
R08885 {'N'}
R03660 {'R'}
R08779 {'R'}
R03836 {'O', 'R'}
R03135 {'C'}
R11671 {'P', 'N', 'R'}
R01734 {'O'}
R05777 {'P'}
R04977 {'H', 'C', 'Fe', 'O', 'N'}
R00172 {'P', 'Se'}
R02620 {'R'}
R08411 {'N'}
R00164 {'P', 'C', 'N', 'R'}
R08780 {'R'}
R06321 {'P', 'O'}
R01237 {'C', 'O'}
R00077 {'P'}
R04937 {'C'}
R08502 {'O', 'N'}
R06320 {'N'}
R08040 {'O', 'N'}
R00165 {'P'}
R03876 {'R'}
R03175 {'C', 'H', 'P', 'O', 'S'

In [6]:
apparate_reactions

{'R11305': ['P'],
 'R08431': ['O'],
 'R07547': ['R'],
 'R10685': ['S'],
 'R10390': ['S'],
 'R04146': ['C'],
 'R08535': ['O'],
 'R06317': ['O'],
 'R07443': ['H', 'O', 'C'],
 'R10152': ['S', 'Sn'],
 'R00914': ['P', 'S'],
 'R08427': ['O'],
 'R03142': ['C', 'N'],
 'R03841': ['H', 'P'],
 'R04886': ['C', 'N'],
 'R08519': ['P'],
 'R00083': ['C'],
 'R04613': ['R'],
 'R07341': ['P'],
 'R04644': ['R'],
 'R00312': ['C', 'Fe', 'H', 'O', 'N'],
 'R00168': ['P'],
 'R07838': ['O', 'S'],
 'R05106': ['R'],
 'R03656': ['R'],
 'R09467': ['O', 'C'],
 'R03986': ['P', 'C', 'N'],
 'R08299': ['O'],
 'R11470': ['S'],
 'R08578': ['R'],
 'R04808': ['S'],
 'R02876': ['H', 'C'],
 'R02460': ['H', 'O', 'C', 'N'],
 'R09851': ['Cl'],
 'R09144': ['Cl'],
 'R09847': ['R'],
 'R05188': ['R'],
 'R01348': ['R'],
 'R08885': ['N'],
 'R03660': ['R'],
 'R08779': ['R'],
 'R03836': ['O', 'R'],
 'R03135': ['C'],
 'R11671': ['P', 'N', 'R'],
 'R01734': ['O'],
 'R05777': ['P'],
 'R04977': ['H', 'C', 'Fe', 'O', 'N'],
 'R00172': ['P', 'S