# Dependencies

In [1]:
!pip install rdkit
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem import rdChemReactions
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit import DataStructs
from rdkit.Chem import MolStandardize, inchi
from rdkit.Chem.MolStandardize import rdMolStandardize, tautomer
from rdkit import RDLogger

import numpy as np

from dataclasses import dataclass
from typing import List
from typing import Tuple

from IPython.display import display
from IPython.display import Image

import json

Collecting rdkit
  Downloading rdkit-2023.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.5


# Access JSON Files through WGET

In [2]:
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1F0QBtbiGJ41MLtfZL1JkbHWE8u4TetA0' -O glycolysis_reactions.json
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1oAzBVsOx5WjrwONrP1RHDka0YMNameqF' -O unique_chemicals_glycolysis.json

--2024-04-26 00:46:27--  https://docs.google.com/uc?export=download&id=1F0QBtbiGJ41MLtfZL1JkbHWE8u4TetA0
Resolving docs.google.com (docs.google.com)... 142.251.180.139, 142.251.180.102, 142.251.180.101, ...
Connecting to docs.google.com (docs.google.com)|142.251.180.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://drive.usercontent.google.com/download?id=1F0QBtbiGJ41MLtfZL1JkbHWE8u4TetA0&export=download [following]
--2024-04-26 00:46:27--  https://drive.usercontent.google.com/download?id=1F0QBtbiGJ41MLtfZL1JkbHWE8u4TetA0&export=download
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 209.85.145.132, 2607:f8b0:4001:c1e::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|209.85.145.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2111 (2.1K) [application/octet-stream]
Saving to: ‘glycolysis_reactions.json’


2024-04-26 00:46:27 (25.1 MB/s) - ‘glycolysis_reactions.

# Chemical/Reaction Class Construction and JSON Parser

In [3]:
@dataclass(frozen=True)
class Chemical:
    UNIQUE_ID: str = ""
    COMMON_NAME: str = ""
    SMILES: str = ""
    INCHI: str = ""

@dataclass(frozen=True)
class Reaction:
    UNIQUE_ID: str
    SUBSTRATES: Tuple[Chemical, ...]  # Assuming Chemical objects are hashable
    PRODUCTS: Tuple[Chemical, ...]


In [4]:
# Loading and parsing the chemical data
with open('unique_chemicals_glycolysis.json', 'r') as file:
    chemicals_data = json.load(file)

chemicals_dict = {chem['Chemical ID']: Chemical(
    UNIQUE_ID=chem['Chemical ID'],
    COMMON_NAME=chem['Common Name'],
    SMILES=chem['SMILES'],
    INCHI=chem['InChI']) for chem in chemicals_data}

# Loading and parsing the reaction data
with open('glycolysis_reactions.json', 'r') as file:
    reactions_data = json.load(file)

reactions_list = []
for reaction in reactions_data:
    substrates = tuple(chemicals_dict[sub] for sub in reaction['Substrates'])
    products = tuple(chemicals_dict[prod] for prod in reaction['Products'])
    reaction_obj = Reaction(
        UNIQUE_ID=reaction['Reaction ID'],
        SUBSTRATES=substrates,
        PRODUCTS=products)
    reactions_list.append(reaction_obj)

# Now reaction_list contains Reaction objects and chemicals_dict contains Chemical objects


In [5]:
print(len(reactions_list))
print(len(chemicals_dict))

10
18


#Populating Native Chemicals

In [6]:
native_ids = ["Glucopyranose", "ATP", "NAD", "Pi"]
native_chemicals = []

for chem_id in native_ids:
    if chem_id in chemicals_dict:
        native_chemicals.append(chemicals_dict[chem_id])

# Now, native_chemicals contains the Chemical objects for the specified native_ids


In [7]:
print(native_chemicals)

[Chemical(UNIQUE_ID='Glucopyranose', COMMON_NAME='D-glucopyranose', SMILES='C([C@@H]1([C@H]([C@@H]([C@H](C(O1)O)O)O)O))O', INCHI='InChI=1S/C6H14O6/c7-1-3(9)5(11)6(12)4(10)2-8/h3-12H,1-2H2/t3-,5-,6-/m1/s1'), Chemical(UNIQUE_ID='ATP', COMMON_NAME='ATP', SMILES='C(OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@H]3(O[C@@H](N1(C2(\\C(\\N=C/1)=C(N)/N=C\\N=2)))[C@H](O)[C@H](O)3)', INCHI='InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1'), Chemical(UNIQUE_ID='NAD', COMMON_NAME='NAD<sup>+</sup>', SMILES='C5(/C(/C(N)=O)=C\\C=C/[N+](/[C@@H]4(O[C@H](COP(OP(OC[C@H]3(O[C@@H](N1(C2(\\C(\\N=C/1)=C(N)/N=C\\N=2)))[C@H](O)[C@H](O)3))(=O)[O-])(=O)[O-])[C@@H](O)[C@@H](O)4))=5)', INCHI='InChI=1S/C21H28N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1-4,7-8,10-1

# Writing RO List

In [8]:
RO_list = ["[O:1][C:2][C:3][C:4][C:5][C:6][C:7][O:8][H]>>[O:1][C:2][C:3][C:4][C:5][C:6][C:7][O:8][C]([H])([H])([H])"]

In [9]:
print(len(RO_list))

1


# Original Object Matching Synthesis

In [10]:
# HyperGraph class to store relationships and states of chemicals and reactions
class HyperGraph:
    def __init__(self):
        self.reaction_to_shell = {}  # Dictionary mapping each reaction to its respective shell (depth level in the synthesis)
        self.chemical_to_shell = {}  # Dictionary mapping each chemical to its respective shell
        self.chemical_to_cascade = {}  # Additional data structure, if needed
        self.chemical_to_pathway = {}  # Additional data structure, if needed

# Synthesizer class handling the synthesis algorithm
class Synthesizer:
    def __init__(self):
        self.curr_shell = 0
        self.all_reactions = []  # This will be populated with Reaction objects
        self.all_chemicals = {}  # Dictionary: Key = standardized InChI, Value = Chemical object
        self.chemical_to_shell = {}  # Now tracking Chemical objects directly
        self.reaction_to_shell = {}

    # Main method to run the synthesis process
    def run(self, reactions_list, chemicals_dict, native_chemicals):
        self.curr_shell = 0
        self.all_reactions = reactions_list
        self.all_chemicals = chemicals_dict

        #log native chemicals into shell 0
        for native_chem in native_chemicals:
          self.chemical_to_shell[native_chem] = 0

        # Expand the synthesis process shell by shell
        while self._expand_once():
            print(self.curr_shell)

        # Compile results in a HyperGraph object
        output = HyperGraph()
        output.chemical_to_shell = self.chemical_to_shell
        output.reaction_to_shell = self.reaction_to_shell
        return output

    def _expand_once(self):
        self.curr_shell += 1
        is_expanded = False

        # Temporary storage for reactions enabled in this iteration
        enabled_reactions = []

        # Iterate through all reactions
        for reaction in self.all_reactions:
            # Skip if the reaction has already been put in the expansion
            if reaction in self.reaction_to_shell:
                continue

            # Check if all substrates are available in previous shells
            substrates_available = all(
                self.chemical_to_shell.get(substrate) is not None and
                self.chemical_to_shell[substrate] < self.curr_shell  # Ensure substrates were available before current shell
                for substrate in reaction.SUBSTRATES
            )

            if substrates_available:
                is_expanded = True
                # Temporarily store the reaction as enabled
                enabled_reactions.append(reaction)

        # After checking all reactions, update the shells for enabled reactions and their products
        for reaction in enabled_reactions:
            self.reaction_to_shell[reaction] = self.curr_shell
            for product in reaction.PRODUCTS:
                if product not in self.chemical_to_shell:
                    self.chemical_to_shell[product] = self.curr_shell

        return is_expanded

# Usage example
synthesizer = Synthesizer()
hypergraph = synthesizer.run(reactions_list, chemicals_dict, native_chemicals)

#print(hypergraph.chemical_to_shell)
#print(hypergraph.reaction_to_shell)

size = len(hypergraph.chemical_to_shell)
print(f"Total Size of Chemical to Shell: {size}")
rxn_size = len(hypergraph.reaction_to_shell)
print(f"Total Size of Reaction to Shell: {rxn_size}")

1
2
3
4
5
6
7
8
9
Total Size of Chemical to Shell: 18
Total Size of Reaction to Shell: 10


# New RO + Object Matching Synthesis

In [18]:
# HyperGraph class to store relationships and states of chemicals and reactions
class HyperGraph:
    def __init__(self):
        self.reaction_to_shell = {}  # Dictionary mapping each reaction to its respective shell (depth level in the synthesis)
        self.chemical_to_shell = {}  # Dictionary mapping each chemical to its respective shell
        self.chemical_to_cascade = {}  # Additional data structure, if needed
        self.chemical_to_pathway = {}  # Additional data structure, if needed

# Synthesizer class handling the synthesis algorithm
class Synthesizer:
    def __init__(self):
        self.curr_shell = 0
        self.all_reactions = []  # This will be populated with Reaction objects
        self.all_chemicals = {}  # Dictionary: Key = standardized InChI, Value = Chemical object
        self.chemical_to_shell = {}  # Now tracking Chemical objects directly
        self.reaction_to_shell = {}

    # Main method to run the synthesis process
    def run(self, reactions_list, chemicals_dict, native_chemicals):
        self.curr_shell = 0
        self.all_reactions = reactions_list
        self.all_chemicals = chemicals_dict

        #log native chemicals into shell 0
        for native_chem in native_chemicals:
          self.chemical_to_shell[native_chem] = 0

        # Expand the synthesis process shell by shell
        while self._expand_once():
            print(self.curr_shell)

        # Compile results in a HyperGraph object
        output = HyperGraph()
        output.chemical_to_shell = self.chemical_to_shell
        output.reaction_to_shell = self.reaction_to_shell
        return output

    def _expand_once(self):
        self.curr_shell += 1
        is_expanded = False
        compound_counter = 1  # To name new compounds uniquely

        # Temporary storage for reactions enabled in this iteration
        enabled_reactions = []

        # Iterate through all reactions
        for reaction in self.all_reactions:
            if reaction in self.reaction_to_shell:
                continue

            substrates_available = all(self.chemical_to_shell.get(substrate) is not None and
                                      self.chemical_to_shell[substrate] < self.curr_shell
                                      for substrate in reaction.SUBSTRATES)

            if substrates_available:
                is_expanded = True
                enabled_reactions.append(reaction)

        # Update the shells for enabled reactions and their products
        for reaction in enabled_reactions:
            self.reaction_to_shell[reaction] = self.curr_shell
            for product in reaction.PRODUCTS:
                if product not in self.chemical_to_shell:
                    self.chemical_to_shell[product] = self.curr_shell

        # Copy all_chemicals values for safe iteration
        chemical_values = list(self.all_chemicals.values())

        # Apply reaction operators
        for substrate in chemical_values:
            if self.chemical_to_shell.get(substrate) == self.curr_shell - 1:  # Work only on substrates from the previous shell
                mol = Chem.MolFromInchi(substrate.INCHI)
                mol = Chem.AddHs(mol)

                for ro in RO_list:
                    reaction_operator = rdChemReactions.ReactionFromSmarts(ro)
                    products = reaction_operator.RunReactants([mol])

                    if products:
                        is_expanded = True
                        for product_set in products:
                            for chem in product_set:
                                smiles = Chem.MolToSmiles(chem)
                                inchi = Chem.MolToInchi(chem)
                                new_chemical = Chemical(f"compound_{compound_counter}", "", smiles, inchi)
                                self.all_chemicals[new_chemical.UNIQUE_ID] = new_chemical
                                self.chemical_to_shell[new_chemical] = self.curr_shell
                                compound_counter += 1

        return is_expanded


# Usage example
synthesizer = Synthesizer()
hypergraph = synthesizer.run(reactions_list, chemicals_dict, native_chemicals)

#print(hypergraph.chemical_to_shell)
#print(hypergraph.reaction_to_shell)

size = len(hypergraph.chemical_to_shell)
print(f"Total Size of Chemical to Shell: {size}")
rxn_size = len(hypergraph.reaction_to_shell)
print(f"Total Size of Reaction to Shell: {rxn_size}")

1
2
3
4
5
6
7
8
9
Total Size of Chemical to Shell: 23
Total Size of Reaction to Shell: 10






In [12]:
# Iterating through the chemical_to_shell dictionary
for chemical, shell in hypergraph.chemical_to_shell.items():
    # Print the chemical's unique ID, its InChI, and the shell number
    print(f"#{shell}, ID: {chemical.UNIQUE_ID}, InChI: {chemical.INCHI}")

#0, ID: Glucopyranose, InChI: InChI=1S/C6H14O6/c7-1-3(9)5(11)6(12)4(10)2-8/h3-12H,1-2H2/t3-,5-,6-/m1/s1
#0, ID: ATP, InChI: InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1
#0, ID: NAD, InChI: InChI=1S/C21H28N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1-4,7-8,10-11,13-16,20-21,29-33H,5-6,23H2,(H3-,22,24,25,34,35,36,37)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1
#0, ID: Pi, InChI: InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)
#1, ID: GLC-6-P, InChI: InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h1,3-6,8-11H,2H2,(H2,12,13,14)/t3?,4-,5-,6-/m1/s1
#1, ID: ADP, InChI: InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-