In [1]:
from typing import List, Optional

from sqlmodel import Field, Relationship, Session, SQLModel, create_engine

# class MoleculeClassLink(SQLModel, table=True):
#     class_id: Optional[int] = Field(
#         default=None, foreign_key="class.id", primary_key=True
#     )
#     molecule_id: Optional[int] = Field(
#         default=None, foreign_key="molecule.id", primary_key=True
#     )

# class Class(SQLModel, table=True):
#     id: Optional[int] = Field(default = None, primary_key = True)
#     smarts: str = Field(index = True)
#     molecules: List['Molecule'] = Relationship(
#         back_populates='classes', link_model=MoleculeClassLink
#     )

class Molecule(SQLModel, table=True):
    id: Optional[int] = Field(default=None, primary_key=True)
    smiles: Optional[str] = Field(default=None, nullable=True)
    index: int

    # Reaction logic
    product_id: Optional[int] = Field(
        foreign_key="molecule.id", default=None, nullable=True
    )
    product: Optional["Molecule"] = Relationship(
        back_populates="reactants", sa_relationship_kwargs=dict(remote_side="Molecule.id")
    )
    reactants: List["Molecule"] = Relationship(back_populates="product")

sqlite_file_name = "database.db"
sqlite_url = f"sqlite:///{sqlite_file_name}"

engine = create_engine(sqlite_url, echo=False)
SQLModel.metadata.create_all(engine)

# with Session(engine) as session:
#     mol = Molecule()
#     session.add(mol)

In [2]:
import dgym as dg
path = '../../dgym-data'
building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')

In [7]:
import rdkit
from tqdm.notebook import tqdm

with Session(engine) as session:
    for idx, mol in enumerate(tqdm(building_blocks)):
        smiles = rdkit.Chem.MolToSmiles(mol) if mol else None
        mol = Molecule(
            index = idx,
            smiles = smiles
        )
        session.add(mol)
        if idx == 100_000:
            break
    session.commit()

  0%|          | 0/262336 [00:00<?, ?it/s]

In [19]:
    Molecule(
        index = idx,
        smiles = smiles
    )

Molecule(id=None, smiles='Nc1ccc(S(=O)(=O)F)cc1Br', index=88566, product_id=None)

In [4]:
mol.reactants.append(Molecule(smiles='Cc1ccccc1C(=O)Nc1nc(-c2cccc(O)c2)cs1'))

In [49]:
from scikit_mol.conversions import SmilesToMolTransformer

mol_maker = SmilesToMolTransformer(parallel=True)