In [3]:
class Node:
    """ A node  """
    def __init__(self, node_id: int, data: str = ""):
        self.node_id = node_id
        self.data = data  # A string to identify the element of the atom
        self.neighbors = []  # List of adjacent Node objects in the graph
        self.edge_types = []  # List of bond types for the adjacent Node objects in the graph

class Graph:
    """ A graph that contains a Node object for each atom of the molecule """
    def __init__(self):
        # Dictionary to store the nodes.
        # Each key is a node ID (an integer)
        # Each value is the associated Node object
        self.nodes = {}

In [4]:
!pip install rdkit-pypi

Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Downloading rdkit_pypi-2022.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit-pypi
Successfully installed rdkit-pypi-2022.9.5


In [5]:
from rdkit.Chem import RWMol
from rdkit.Chem import Atom, BondType

def graph_to_mol(graph: Graph) -> RWMol:
    """
    Converts an instance of the Graph class into an RDKit Mol instance.
    """
    mol = RWMol()
    node_to_idx = {}  # Map node_id to RDKit atom indices

    # Add atoms to the molecule
    for node_id, node in graph.nodes.items():
        atom = Atom(node.data)  # Assume `node.data` holds the atomic symbol (e.g., 'C', 'O', 'N')
        idx = mol.AddAtom(atom)
        node_to_idx[node_id] = idx

    # Add bonds to the molecule
    for node_id, node in graph.nodes.items():
        for neighbor, bond_type in zip(node.neighbors, node.edge_types):
            # Add bonds only if the bond doesn't already exist
            if mol.GetBondBetweenAtoms(node_to_idx[node_id], node_to_idx[neighbor.node_id]) is None:
                mol.AddBond(
                    node_to_idx[node_id],
                    node_to_idx[neighbor.node_id],
                    int_bond_type_to_rdkit(bond_type)
                )

    return mol

def int_bond_type_to_rdkit(bond_type: int):
    """
    Converts an integer bond type (1, 2, 3) into an RDKit BondType.
    """
    bond_map = {
        1: BondType.SINGLE,
        2: BondType.DOUBLE,
        3: BondType.TRIPLE,
    }
    return bond_map.get(bond_type, BondType.UNSPECIFIED)

In [6]:
# Example usage
graph = Graph()

# Create nodes (atoms)
node1 = Node(node_id=1, data="C")  # Carbon atom
node2 = Node(node_id=2, data="O")  # Oxygen atom
node3 = Node(node_id=3, data="H")  # Hydrogen atom
node4 = Node(node_id=4, data="H")  # Hydrogen atom

# Add nodes to the graph
graph.nodes[node1.node_id] = node1
graph.nodes[node2.node_id] = node2
graph.nodes[node3.node_id] = node3
graph.nodes[node4.node_id] = node4

# Define neighbors and bond types (ethanol example: CH3-CH2-OH)
node1.neighbors = [node2, node3, node4]  # Carbon connected to Oxygen and two Hydrogens
node1.edge_types = [1, 1, 1]  # All single bonds

node2.neighbors = [node1]  # Oxygen connected back to Carbon
node2.edge_types = [1]  # Single bond

node3.neighbors = [node1]  # Hydrogen connected to Carbon
node3.edge_types = [1]  # Single bond

node4.neighbors = [node1]  # Hydrogen connected to Carbon
node4.edge_types = [1]  # Single bond

# Convert the Graph to an RDKit Mol
mol = graph_to_mol(graph)

# Print the Mol as a SMILES string (for validation)
from rdkit.Chem import MolToSmiles
print(MolToSmiles(mol))  # Expected output: "CO" (ethanol)


[H]C([H])O
