In [None]:
from pathlib import Path

from lightning import pytorch as pl
from lightning.pytorch.callbacks import ModelCheckpoint
import pandas as pd

from chemprop import data, featurizers, models, nn

In [None]:
from pathlib import Path

# Where to persist all your .sdf files
OUTPUT_SDF_DIR = Path("/home/calvin/code/chemprop_original/transition_sdf_files")
OUTPUT_SDF_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
import tempfile
from rdkit import Chem
from rdkit.Chem import SDWriter


from rdkit import Chem

def save_mol_to_sdf(
    mol: Chem.Mol,
    formula: str,
    rxn: str,
    cfg: str,
    split: str = "train"
) -> Path:
    """Saves an RDKit Mol to a persistent SDF file in OUTPUT_SDF_DIR."""
    # embed properties
    mol.SetProp("mol_type", "ts")
    mol.SetProp("type", "ts")
    mol.SetProp("reaction", rxn)
    mol.SetProp("formula", formula)
    mol.SetProp("config", cfg)
    mol.SetProp("split", split)

    # construct a deterministic filename
    filename = OUTPUT_SDF_DIR / f"{formula}__{rxn}__{cfg}.sdf"
    writer = Chem.SDWriter(str(filename))
    writer.write(mol)
    writer.close()
    return filename


In [None]:
from rdkit.Chem import GetPeriodicTable


def get_expected_valence(atom):
    ptable = GetPeriodicTable()
    return ptable.GetDefaultValence(atom.GetAtomicNum())

In [5]:
from rdkit import Chem
import numpy as np


def add_bonds_by_distance(mol, atomic_numbers, positions, threshold_scale=1.2):
    """Adds bonds to an RDKit mol based on interatomic distances and covalent radii."""
    from rdkit.Chem import rdchem
    from periodictable import elements  # pip install periodictable

    conf = mol.GetConformer()
    N = len(atomic_numbers)

    for i in range(N):
        for j in range(i + 1, N):
            pos_i = np.array(conf.GetAtomPosition(i))
            pos_j = np.array(conf.GetAtomPosition(j))
            dist = np.linalg.norm(pos_i - pos_j)

            r_i = elements[atomic_numbers[i]].covalent_radius
            r_j = elements[atomic_numbers[j]].covalent_radius
            threshold = (r_i + r_j) * threshold_scale

            if dist < threshold:
                try:
                    mol.AddBond(i, j, order=rdchem.BondType.SINGLE)
                except:
                    pass  # skip if already bonded or error

In [6]:
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdMolTransforms
import random


def get_random_dihedral_with_angle(mol: Chem.Mol, return_sincos: bool = True):
    """
    Returns a single randomly sampled dihedral tuple and its angle.

    Parameters
    ----------
    mol : Chem.Mol
        Molecule with 3D coordinates (must have conformer).
    return_sincos : bool
        If True, returns (sin, cos). If False, returns angle in radians.

    Returns
    -------
    tuple[int, int, int, int], np.ndarray or float
        Dihedral atom indices and the angle (either sin/cos or float).
        Returns None if no valid dihedral found.
    """
    conf = mol.GetConformer()
    dihedrals = []

    for bond in mol.GetBonds():
        j = bond.GetBeginAtomIdx()
        k = bond.GetEndAtomIdx()

        # neighbors excluding the bonded partner
        nbrs_j = [
            a.GetIdx() for a in mol.GetAtomWithIdx(j).GetNeighbors() if a.GetIdx() != k
        ]
        nbrs_k = [
            a.GetIdx() for a in mol.GetAtomWithIdx(k).GetNeighbors() if a.GetIdx() != j
        ]

        for i in nbrs_j:
            for l in nbrs_k:
                try:
                    angle_deg = rdMolTransforms.GetDihedralDeg(conf, i, j, k, l)
                    angle_rad = np.deg2rad(angle_deg)
                    if return_sincos:
                        angle = np.array(
                            [np.sin(angle_rad), np.cos(angle_rad)], dtype=np.float32
                        )
                    else:
                        angle = angle_rad
                    dihedrals.append(((i, j, k, l), angle))
                except:
                    continue  # some atoms might be invalid (e.g., linear or overlapping)

    if not dihedrals:
        return None  # no valid dihedral
    return random.choice(dihedrals)

In [7]:
SINGLE_BOND_LENGTH = {
    "Br_Br": 2.29,
    "Br_Cr": 1.94,
    "Br_H": 1.41,
    "C_C": 1.54,
    "C_Cl": 1.77,
    "C_F": 1.35,
    "C_H": 1.09,
    "C_I": 2.13,
    "C_N": 1.47,
    "C_O": 1.43,
    "C_P": 1.87,
    "C_S": 1.81,
    "C_Si": 1.86,
    "Cl_Cl": 1.99,
    "Cl_H": 1.27,
    "Cl_N": 1.75,
    "Cl_Si": 2.03,
    "Cl_P": 2.03,
    "Cl_S": 2.07,
    "F_F": 1.42,
    "F_H": 0.92,
    "F_P": 1.57,
    "F_S": 1.56,
    "F_Si": 1.56,
    "F_Xe": 1.90,
    "H_H": 1.0,
    "H_I": 1.61,
    "H_N": 1.04,
    "H_O": 0.96,
    "H_P": 1.42,
    "H_S": 1.34,
    "H_Si": 1.48,
    "I_I": 2.66,
    "N_N": 1.45,
    "N+1_N+1": 1.81,
    "N_O": 1.44,
    "N+1_O-1": 1.2,
    "O_O": 1.48,
    "O_P": 1.63,
    "O_S": 1.58,
    "O_Si": 1.66,
    "P_P": 2.21,
    "S_S": 2.05,
    "Si_Si": 2.35,
}


def get_single_bond_length(
    symbol_1: str,
    symbol_2: str,
    charge_1: int = 0,
    charge_2: int = 0,
) -> float:
    """
    Get an approximate for a single bond length between two elements.

    Args:
        symbol_1 (str): Symbol 1.
        symbol_2 (str): Symbol 2.
        charge_1 (int, optional): The partial charge of the atom represented by ``symbol_1``.
        charge_2 (int, optional): The partial charge of the atom represented by ``symbol_2``.

    Returns: float
        The estimated single bond length in Angstrom.
    """
    if charge_1 and charge_2:
        symbol_1 = f"{symbol_1}{'+' if charge_1 > 0 else ''}{charge_1}"
        symbol_2 = f"{symbol_2}{'+' if charge_2 > 0 else ''}{charge_2}"
    bond1, bond2 = "_".join([symbol_1, symbol_2]), "_".join([symbol_2, symbol_1])
    if bond1 in SINGLE_BOND_LENGTH.keys():
        return SINGLE_BOND_LENGTH[bond1]
    if bond2 in SINGLE_BOND_LENGTH.keys():
        return SINGLE_BOND_LENGTH[bond2]
    return 2.5

In [8]:
# def log_degree_zero_hydrogens(mol, context_str=None):
#     conf = mol.GetConformer()
#     for idx, atom in enumerate(mol.GetAtoms()):
#         if atom.GetSymbol() == "H" and atom.GetDegree() == 0:
#             pos = conf.GetAtomPosition(idx)
#             msg = (
#                 f"[DEG0-H] Atom idx={idx}, pos=({pos.x:.2f}, {pos.y:.2f}, {pos.z:.2f})"
#             )
#             if context_str:
#                 msg += f" | {context_str}"
#             print(msg)

In [9]:
# def resolve_floating_hydrogens(mol, atomic_numbers, positions, bond_length_table=None):
#     """
#     For degree-zero H atoms, assign to nearest heavy atom with open valence.

#     Args:
#         mol (Chem.RWMol): The molecule (should be editable, i.e. RWMol).
#         atomic_numbers (list[int]): Atomic numbers (Z) for each atom.
#         positions (np.ndarray): Shape (N, 3), 3D positions for each atom.
#         bond_length_table (dict): Optional; mapping like ('C', 'H') -> 1.09.

#     Returns:
#         fixed (int): Number of hydrogens fixed.
#     """
#     from rdkit.Chem import GetPeriodicTable

#     ptable = GetPeriodicTable()
#     N = len(atomic_numbers)
#     conf = mol.GetConformer()
#     fixed = 0

#     # Find degree-zero H atoms
#     deg0_H = [
#         i
#         for i, a in enumerate(mol.GetAtoms())
#         if a.GetSymbol() == "H" and a.GetDegree() == 0
#     ]
#     heavy_indices = [i for i, a in enumerate(mol.GetAtoms()) if a.GetSymbol() != "H"]

#     for hi in deg0_H:
#         h_pos = positions[hi]
#         # Collect distances to all heavy atoms
#         distances = []
#         for ci in heavy_indices:
#             c_atom = mol.GetAtomWithIdx(ci)
#             c_pos = positions[ci]
#             dist = np.linalg.norm(h_pos - c_pos)
#             # Get bond threshold for this pair
#             c_sym = c_atom.GetSymbol()
#             h_sym = "H"
#             if bond_length_table:
#                 key1, key2 = f"{c_sym}_{h_sym}", f"{h_sym}_{c_sym}"
#                 bond_length = bond_length_table.get(
#                     key1, bond_length_table.get(key2, 1.1)
#                 )
#             else:
#                 bond_length = 1.1  # default C–H
#             if dist < bond_length * 1.25:
#                 expected = ptable.GetDefaultValence(c_atom.GetAtomicNum())
#                 current = c_atom.GetExplicitValence()
#                 deficit = expected - current
#                 if deficit > 0:
#                     distances.append((dist, ci, deficit))

#         if distances:
#             # Sort by distance, take the closest heavy atom with open valence
#             distances.sort()
#             dist, ci, deficit = distances[0]
#             mol.AddBond(hi, ci, Chem.rdchem.BondType.SINGLE)
#             print(
#                 f"[FIXED] Bonded floating H (idx={hi}) to {mol.GetAtomWithIdx(ci).GetSymbol()} (idx={ci}) at d={dist:.2f}Å."
#             )
#             fixed += 1
#         else:
#             print(f"[UNFIXED] Floating H (idx={hi}) could not be attached.")

#     return fixed

In [10]:
# def build_rdkit_mol_from_h5_with_bonds(atomic_numbers, positions, threshold_scale=1.1):
#     """Create an RDKit mol with inferred bonds using a bond length reference table."""
#     from periodictable import elements

#     mol = Chem.RWMol()
#     atom_indices = []
#     symbols = [elements[int(z)].symbol for z in atomic_numbers]

#     for z in atomic_numbers:
#         atom = Chem.Atom(int(z))
#         idx = mol.AddAtom(atom)
#         atom_indices.append(idx)

#     # Distance-based bond addition using bond length table
#     N = len(atomic_numbers)
#     for i in range(N):
#         for j in range(i + 1, N):
#             pos_i = positions[i]
#             pos_j = positions[j]
#             dist = np.linalg.norm(pos_i - pos_j)

#             # Lookup single bond length for these two elements
#             l_ref = get_single_bond_length(symbols[i], symbols[j])
#             threshold = l_ref * threshold_scale

#             if symbols[i] == "H" or symbols[j] == "H":
#                 fudge = 1.2
#                 threshold = l_ref * fudge

#             if dist < threshold:
#                 try:
#                     mol.AddBond(i, j, Chem.rdchem.BondType.SINGLE)
#                 except Exception:
#                     pass

#     mol = mol.GetMol()  # Convert to readonly Mol

#     # Add conformer
#     conf = Chem.Conformer(len(atomic_numbers))
#     for i, pos in enumerate(positions):
#         conf.SetAtomPosition(i, Chem.rdGeometry.Point3D(*pos))
#     mol.AddConformer(conf, assignId=True)

#     return mol

In [11]:
# import h5py
# from pathlib import Path

# all_data = []


# def is_suspicious(mol):
#     for atom in mol.GetAtoms():
#         sym = atom.GetSymbol()
#         val = atom.GetExplicitValence()
#         if (
#             (sym == "C" and val > 4)
#             or (sym == "N" and val > 4)
#             or (sym == "O" and val > 2)
#         ):
#             return True
#     return False


# with h5py.File("/home/calvin/Downloads/Transition1x.h5", "r") as f:
#     for split in ["train"]:
#         for formula in f[split]:
#             for rxn in f[split][formula]:
#                 for cfg in f[split][formula][rxn]:
#                     try:
#                         cfg_group = f[split][formula][rxn][cfg]
#                         if not isinstance(cfg_group, h5py.Group):
#                             continue
#                         if "transition" not in cfg.lower():
#                             continue

#                         atomic_numbers = cfg_group["atomic_numbers"][()]
#                         positions_all = cfg_group["positions"][()]
#                         if len(positions_all.shape) != 3 or positions_all.shape[0] == 0:
#                             print(
#                                 f"[SKIP] positions shape invalid: {positions_all.shape}"
#                             )
#                             continue

#                         positions = positions_all[0]

#                         mol = build_rdkit_mol_from_h5_with_bonds(
#                             atomic_numbers, positions
#                         )
#                         resolve_floating_hydrogens(
#                             mol,
#                             atomic_numbers,
#                             positions,
#                             bond_length_table=SINGLE_BOND_LENGTH,
#                         )
#                         log_degree_zero_hydrogens(mol, f"{formula}/{rxn}/{cfg}")

#                         # Log explicit valence BEFORE sanitize
#                         for i, atom in enumerate(mol.GetAtoms()):
#                             try:
#                                 val = atom.GetExplicitValence()
#                             except Exception:
#                                 val = "ERR"
#                             print(
#                                 f"Atom {i}: {atom.GetSymbol()} degree={atom.GetDegree()} valence={val}"
#                             )

#                         # Sanitize
#                         problem = False
#                         try:
#                             Chem.SanitizeMol(mol)
#                         except Exception as e:
#                             print(f"[SanitizeFail] {formula}/{rxn}/{cfg}: {e}")
#                             problem = True

#                         if mol is None or mol.GetNumAtoms() < 4 or is_suspicious(mol):
#                             print(f"[SKIP or Suspicious] {formula}/{rxn}/{cfg}")
#                             for i, atom in enumerate(mol.GetAtoms()):
#                                 try:
#                                     val = atom.GetExplicitValence()
#                                 except Exception:
#                                     val = "ERR"
#                                 print(
#                                     f"  Atom {i}: {atom.GetSymbol()} degree={atom.GetDegree()} valence={val}"
#                                 )
#                             Chem.MolToMolFile(
#                                 mol, f"suspicious_{formula}_{rxn}_{cfg}.mol"
#                             )
#                             continue
#                         result = get_random_dihedral_with_angle(mol, return_sincos=True)
#                         if result is None:
#                             print(
#                                 f"[SKIP] No dihedral found: formula={formula}, rxn={rxn}, cfg={cfg}"
#                             )
#                             continue

#                         _, angle_vec = result
#                         sdf_path = sdf_path = save_mol_to_temp_sdf(
#                             mol, formula, rxn, cfg, split
#                         )

#                         try:
#                             datapoint = data.MoleculeDatapoint.from_sdf(
#                                 sdf_path,
#                                 keep_h=True,
#                                 mol_type="ts",
#                                 sanitize=True,
#                                 y=angle_vec,
#                             )
#                             all_data.append(datapoint)
#                         except Exception as e:
#                             print(
#                                 f"[ERROR] Failed to parse SDF: {sdf_path} ({formula}/{rxn}/{cfg}) — {e}"
#                             )

#                     except Exception as e:
#                         print(f"[ERROR] Outer loop error at {formula}/{rxn}/{cfg}: {e}")

In [12]:
from pathlib import Path


all_data = []
for sdf_file in Path(OUTPUT_SDF_DIR).glob("*.sdf"):
    try:
        dp_temp = data.MoleculeDatapoint.from_sdf(
            str(sdf_file),
            keep_h=True,
            mol_type="ts",
            sanitize=True
        )
        result = get_random_dihedral_with_angle(dp_temp.mol, return_sincos=True)
        if result is None:
            print(f"[SKIP] No dihedral found: {sdf_file}")
            continue
        _, angle_vec = result
        dp = data.MoleculeDatapoint.from_sdf(
            str(sdf_file),
            keep_h=True,
            mol_type="ts",
            sanitize=True,
            y=[angle_vec]
        )
        all_data.append(dp)
        
    except Exception as e:
        print(f"[ERROR] Loading {sdf_file}: {e}")
# `saved` now holds all your previously generated datapoints


[15:41:48] Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 37
[15:41:48] ERROR: Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:48] Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 32
[15:41:48] ERROR: Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:48] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 30
[15:41:48] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:48] Explicit valence for atom # 5 N, 4, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 31
[15:41:48] ERROR: Explicit valence for atom # 5 N, 4, is greater than permitted
[15:41:48] Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H9NO__rxn6289__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8N2__rxn2231__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn2640__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn6460__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10N2__rxn8630__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C7H12__rxn9585__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/trans

[15:41:48] Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 34
[15:41:48] ERROR: Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:48] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 26
[15:41:48] ERROR: Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:48] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 37
[15:41:48] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:48] Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending on line 39
[15:41:48] ERROR: Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:48] Explicit valence for atom # 5 H, 2, is greater than permitted
[15:41:48] ERROR: Could not sanitize molecule ending 

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn1475__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H6N2__rxn2774__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O__rxn3189__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O2__rxn8720__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H7NO__rxn9035__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H12__rxn2831__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transit

[15:41:49] Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 32
[15:41:49] ERROR: Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 31
[15:41:49] ERROR: Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:49] Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 41
[15:41:49] ERROR: Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 31
[15:41:49] ERROR: Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:49] Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn4759__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C7H10__rxn9488__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn8705__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H4N2__rxn3975__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H14__rxn4167__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H7N3__rxn1524__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transit

[15:41:49] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 32
[15:41:49] ERROR: Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 28
[15:41:49] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 14 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 36
[15:41:49] ERROR: Explicit valence for atom # 14 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 35
[15:41:49] ERROR: Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:49] Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending 

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6O3__rxn5946__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H5N3__rxn2262__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O2__rxn8141__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8N2O__rxn4882__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO2__rxn8594__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H9N__rxn0056__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/trans

[15:41:49] Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 33
[15:41:49] ERROR: Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 43
[15:41:49] ERROR: Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 32
[15:41:49] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 34
[15:41:49] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H6O2__rxn7410__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H12O__rxn7103__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn1041__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H7NO__rxn9555__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H4N2__rxn2373__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H3N3__rxn2805__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transi

[15:41:49] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 29
[15:41:49] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 37
[15:41:49] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 4 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 20
[15:41:49] ERROR: Explicit valence for atom # 4 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 14 H, 2, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on line 34
[15:41:49] ERROR: Explicit valence for atom # 14 H, 2, is greater than permitted
[15:41:49] Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:49] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H6O__rxn2364__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O__rxn9316__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H4O2__rxn1314__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O3__rxn5704__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H7N3O__rxn6575__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H9NO__rxn1959__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transi

[15:41:50] Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 22
[15:41:50] ERROR: Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 33
[15:41:50] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 35
[15:41:50] ERROR: Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:50] Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 34
[15:41:50] ERROR: Explicit valence for atom # 13 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H3NO2__rxn9310__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H7N__rxn0092__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8N2__rxn8322__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn2661__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn4627__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O__rxn2415__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transiti

[15:41:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 31
[15:41:50] ERROR: Explicit valence for atom # 5 N, 4, is greater than permitted
[15:41:50] Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 34
[15:41:50] ERROR: Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 32
[15:41:50] ERROR: Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:50] Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 31
[15:41:50] ERROR: Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on li

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H7N__rxn1606__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn4379__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn9867__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn1239__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn1445__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H5NO__rxn4141__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transiti

[15:41:50] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 23
[15:41:50] ERROR: Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:50] Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 21
[15:41:50] ERROR: Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:50] Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 25
[15:41:50] ERROR: Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 25
[15:41:50] ERROR: Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:50] Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on lin

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H3NO2__rxn4827__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H3NO__rxn2387__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H6O__rxn1630__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H3N3__rxn5526__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8N2__rxn5504__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H3N3O__rxn0336__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/trans

[15:41:50] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 27
[15:41:50] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 26
[15:41:50] ERROR: Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 37
[15:41:50] ERROR: Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 36
[15:41:50] ERROR: Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:50] Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O__rxn3674__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H9NO__rxn8532__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H8O__rxn6627__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H5NO2__rxn8692__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H4N2O2__rxn5568__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H3N3__rxn3262__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/tran

[15:41:50] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:50] ERROR: Could not sanitize molecule ending on line 39
[15:41:50] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 29
[15:41:51] ERROR: Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:51] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 36
[15:41:51] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 18 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 43
[15:41:51] ERROR: Explicit valence for atom # 18 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O2__rxn8187__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2__rxn4286__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H8O__rxn6208__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H12O__rxn6260__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H9NO__rxn6282__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H7N__rxn8996__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transit

[15:41:51] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 31
[15:41:51] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 33
[15:41:51] ERROR: Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 5 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 22
[15:41:51] ERROR: Explicit valence for atom # 5 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 31
[15:41:51] ERROR: Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn4569__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H7N__rxn8409__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H3N3__rxn0300__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O__rxn1755__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8__rxn3210__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO__rxn3763__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition

[15:41:51] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 44
[15:41:51] ERROR: Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 32
[15:41:51] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 27
[15:41:51] ERROR: Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:51] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 36
[15:41:51] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 14 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C7H12__rxn5653__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn6647__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H5NO__rxn1670__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H9NO__rxn8439__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O__rxn9227__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8N2__rxn6714__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transi

[15:41:51] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 32
[15:41:51] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 26
[15:41:51] ERROR: Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:51] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 27
[15:41:51] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 39
[15:41:51] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending 

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn5684__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H6N2__rxn2150__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6O__rxn0152__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H10O__rxn7616__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H7NO__rxn5584__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H10O__rxn9702__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transi

[15:41:51] Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 30
[15:41:51] ERROR: Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 33
[15:41:51] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:51] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:51] ERROR: Could not sanitize molecule ending on line 30
[15:41:51] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 27
[15:41:52] ERROR: Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 16 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on 

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H5NO2__rxn5370__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO2__rxn6242__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6O2__rxn3372__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H5N3__rxn0738__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H10O__rxn7998__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O__rxn8982__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/trans

[15:41:52] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 28
[15:41:52] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 37
[15:41:52] ERROR: Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 38
[15:41:52] ERROR: Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 35
[15:41:52] ERROR: Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 4 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on l

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O3__rxn7844__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H7N__rxn1724__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6O2__rxn4048__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H4N2O__rxn3015__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H6O__rxn1224__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O__rxn2435__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transit

[15:41:52] Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 27
[15:41:52] ERROR: Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 40
[15:41:52] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 41
[15:41:52] ERROR: Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 33
[15:41:52] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C7H10__rxn8164__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn2135__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H4N2__rxn8795__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O2__rxn4797__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO2__rxn6248__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO2__rxn5529__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transi

[15:41:52] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 32
[15:41:52] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 31
[15:41:52] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 29
[15:41:52] ERROR: Explicit valence for atom # 6 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 33
[15:41:52] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending o

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H5N3__rxn6092__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn4768__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6O2__rxn1655__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn6459__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H5NO__rxn4658__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C6H10__rxn9192__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transit

[15:41:52] Explicit valence for atom # 5 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 25
[15:41:52] ERROR: Explicit valence for atom # 5 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 39
[15:41:52] ERROR: Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:52] Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 38
[15:41:52] ERROR: Explicit valence for atom # 1 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on line 27
[15:41:52] ERROR: Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:52] Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:52] ERROR: Could not sanitize molecule ending on l

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H8O__rxn2446__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn6398__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn8700__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H6O__rxn2781__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8N2__rxn3817__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H3N3O__rxn3844__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/trans

[15:41:53] Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 30
[15:41:53] ERROR: Explicit valence for atom # 12 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 19
[15:41:53] ERROR: Explicit valence for atom # 6 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 32
[15:41:53] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 33
[15:41:53] ERROR: Explicit valence for atom # 11 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 7 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6N2O__rxn6025__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H7NO2__rxn5536__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H6O3__rxn7127__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8O2__rxn4573__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H9NO__rxn9470__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C3H2N4__rxn6788__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/tran

[15:41:53] Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 30
[15:41:53] ERROR: Explicit valence for atom # 10 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 38
[15:41:53] ERROR: Explicit valence for atom # 2 N, 4, is greater than permitted
[15:41:53] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 33
[15:41:53] ERROR: Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 35
[15:41:53] ERROR: Explicit valence for atom # 0 N, 4, is greater than permitted
[15:41:53] Explicit valence for atom # 9 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on l

[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H10O__rxn1489__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H4N4O__rxn8587__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C5H6O2__rxn7408__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H5NO__rxn2993__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C4H8__rxn2421__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transition_sdf_files/C2H4N4__rxn1822__transition_state.sdf: 'NoneType' object has no attribute 'GetProp'
[ERROR] Loading /home/calvin/code/chemprop_original/transit

[15:41:53] Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 37
[15:41:53] ERROR: Explicit valence for atom # 15 H, 2, is greater than permitted
[15:41:53] Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 37
[15:41:53] ERROR: Explicit valence for atom # 3 N, 4, is greater than permitted
[15:41:53] Explicit valence for atom # 8 H, 2, is greater than permitted
[15:41:53] ERROR: Could not sanitize molecule ending on line 37
[15:41:53] ERROR: Explicit valence for atom # 8 H, 2, is greater than permitted


In [13]:
all_data

[MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x708bbee12030>, y=[array([ 0.3984862 , -0.91717434], dtype=float32)], weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='rxn0623_ts', V_f=None, E_f=None, V_d=None),
 MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x708bbee11c40>, y=[array([ 0.9893272 , -0.14571112], dtype=float32)], weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='rxn6044_ts', V_f=None, E_f=None, V_d=None),
 MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x708bbee11af0>, y=[array([0.4788202, 0.877913 ], dtype=float32)], weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='rxn0082_ts', V_f=None, E_f=None, V_d=None),
 MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x708bbee120a0>, y=[array([-0.04122122, -0.99915004], dtype=float32)], weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='rxn8723_ts', V_f=None, E_f=None, V_d=None),
 MoleculeDatapoint(mol=<rdkit.Chem.rdche

In [14]:
mols = [d.mol for d in all_data]  # RDkit Mol objects are use for structure based splits
train_indices, val_indices, test_indices = data.make_split_indices(
    mols, "random", (0.8, 0.1, 0.1), seed=42
)  # unpack the tuple into three separate lists

The return type of make_split_indices has changed in v2.1 - see help(make_split_indices)


In [15]:
train_data, val_data, test_data = data.split_data_by_indices(
    all_data, train_indices, val_indices, test_indices
)

len(train_data[0]), len(val_data[0]), len(test_data[0])  # 80, 10, 10

(6749, 843, 845)

In [16]:
featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()

train_dset = data.MoleculeDataset(train_data[0], featurizer)


val_dset = data.MoleculeDataset(val_data[0], featurizer)


test_dset = data.MoleculeDataset(test_data[0], featurizer)

In [17]:
num_workers = 6
train_loader = data.build_dataloader(train_dset, num_workers=8)
val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)
test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)

In [18]:
mp = nn.BondMessagePassing(depth=4, dropout=0.2)

In [19]:
# agg = nn.MeanAggregation()
norm_agg = nn.NormAggregation()

In [20]:
import torch
import torch.nn.functional as F
from chemprop.nn.metrics import ChempropMetric
from torch import Tensor


class CircVectorCosine(ChempropMetric):
    r"""
    Cosine‐based loss for 2-D circular vectors.
    For each (sin,cos) pair it computes
        loss = 1 - cosine_similarity(pred_vector, true_vector)
    and then applies your usual masking & weighting.
    """

    def __init__(self, task_weights=None):
        super().__init__(task_weights=task_weights)

    def _calc_unreduced_loss(
        self,
        preds: torch.Tensor,  # (B, out_dim)
        targets: torch.Tensor,  # (B, out_dim)
        mask: torch.Tensor,  # (B, out_dim)
        weights: torch.Tensor,  # (B,) or (B,2T) or (B,T)
        lt_mask: torch.Tensor,
        gt_mask: torch.Tensor,
    ) -> torch.Tensor:
        B, orig_dim = preds.shape

        # how many full (sin,cos) pairs?
        T = orig_dim // 2
        slice_width = 2 * T

        # slice everything down to exactly 2*T columns
        preds = preds[:, :slice_width]
        targets = targets[:, :slice_width]

        mask = mask[:, :slice_width]

        # now reshape safely (B, T, 2)
        preds = preds.view(B, T, 2)

        targets = targets.view(B, T, 2)

        preds = F.normalize(preds, dim=-1)  # guarantee unit length
        targets = F.normalize(targets, dim=-1)

        # vector‐level mask: require both sin & cos valid
        comp_mask = mask.view(B, T, 2).all(dim=-1)  # (B, T)

        # ==== robust weight handling ====
        w = weights

        if w.ndim == 1 and w.numel() == B:

            w = w.unsqueeze(1).expand(B, T)
        elif w.ndim == 2 and w.numel() == B * T:
            w = w.view(B, T)
        elif w.ndim == 2 and w.numel() == B * slice_width:
            w = w.view(B, T, 2).mean(dim=-1)
        else:
            w = torch.tensor(w.mean(), device=w.device).expand(B, T)
        # =================================

        # compute cosine similarity in [-1,1]
        cos_sim = F.cosine_similarity(preds, targets, dim=-1)  # (B, T)

        # loss = 1 - cosine_similarity
        loss = (1.0 - cos_sim) * comp_mask * w  # (B, T)

        return loss


class CosineAngleMetric(ChempropMetric):
    """A metric reporting mean(1-cosine_similarity) across all (sample,task)."""

    def __init__(self, task_weights=None):
        super().__init__(task_weights=task_weights)

    @property
    def alias(self) -> str:
        return "mean_cosine_loss"

    def _calc_unreduced_loss(
        self,
        preds: torch.Tensor,
        targets: torch.Tensor,
        mask: torch.Tensor,
        weights: torch.Tensor,
        lt_mask: torch.Tensor,
        gt_mask: torch.Tensor,
    ) -> torch.Tensor:
        # reuse exactly the same computation as CircVectorCosine
        return CircVectorCosine._calc_unreduced_loss(
            self, preds, targets, mask, weights, lt_mask, gt_mask
        )


class AngularErrorMetric(ChempropMetric):
    """Mean absolute angular error in degrees, reported over B·2T entries."""

    def __init__(self, task_weights=None, in_degrees: bool = True):
        super().__init__(task_weights=task_weights)
        self.in_degrees = in_degrees

    @property
    def alias(self) -> str:
        return "val_maae_deg"

    def _calc_unreduced_loss(
        self,
        preds: Tensor,  # (B, 2T)
        targets: Tensor,  # (B, 2T)
        mask: Tensor,  # (B, 2T)
        weights: Tensor,  # (B,) or (B,T) or (B,2T)
        lt_mask: Tensor,
        gt_mask: Tensor,
    ) -> Tensor:
        B, out_dim = preds.shape
        T = out_dim // 2
        slice_w = 2 * T

        # trim & reshape
        p = preds[:, :slice_w].view(B, T, 2)
        t = targets[:, :slice_w].view(B, T, 2)
        m = mask[:, :slice_w].view(B, T, 2)

        # vector‐level validity mask
        vec_mask = m.all(dim=-1)  # (B, T)

        # angles, difference, wrap, abs
        pred_ang = torch.atan2(p[..., 0], p[..., 1])
        true_ang = torch.atan2(t[..., 0], t[..., 1])
        diff = (pred_ang - true_ang + torch.pi) % (2 * torch.pi) - torch.pi
        err = diff.abs()  # (B, T)
        if self.in_degrees:
            err = err * 180.0 / torch.pi

        # zero out invalid
        err = err * vec_mask  # (B, T)

        # now tile each error into both sin‐ and cos‐slots → (B,T,2)
        err2 = err.unsqueeze(-1).expand(B, T, 2)
        # flatten → (B, 2T)
        return err2.reshape(B, slice_w)

In [21]:
import torch
from torch import Tensor
from chemprop.nn.predictors import RegressionFFN, AngularRawFFN


class PeriodicRegressionFFN(RegressionFFN):
    """
    A RegressionFFN that predicts (sin θ, cos θ) for each task and
    renormalizes to unit length.
    """

    def __init__(
        self,
        input_dim: int = 300,
        hidden_dim: int = 300,
        n_layers: int = 1,
        dropout: float = 0.0,
        activation: str = "relu",
        n_tasks: int = 1,
        criterion=None,
    ):

        super().__init__(
            input_dim=input_dim,
            hidden_dim=hidden_dim,
            n_layers=n_layers,
            dropout=dropout,
            activation=activation,
            n_tasks=n_tasks,
            criterion=criterion,
        )

    def forward(self, x: Tensor) -> Tensor:
        """
        x: (batch, input_dim)
        returns: (batch, 2 * n_tasks) raw outputs,
                 which we then reshape + normalize.
        """
        raw = self.ffn(x)  # shape (batch, 2 * n_tasks)

        B, twoT = raw.shape
        T = twoT // 2

        # reshape to (batch, T, 2)
        vecs = raw.view(B, T, 2)

        # normalize each 2-vector to unit length
        # norms = torch.norm(vecs, dim=-1, keepdim=True).clamp(min=1e-6)
        # unit = vecs / norms
        unit = vecs

        # flatten back to (batch, 2*T)
        return unit.view(B, 2 * T)


ffn = PeriodicRegressionFFN(
    input_dim=mp.output_dim,
    n_tasks=2,
    dropout=0.2,
    n_layers=3,
    criterion=CircVectorCosine(task_weights=[1.0, 1.0]),
)
ffn = AngularRawFFN(
        n_tasks       = 1,
    input_dim     = mp.output_dim,
    hidden_dim    = 300,
    n_layers      = 2,
    dropout       = 0.10,
)


In [22]:
from chemprop.nn import metrics

metric_list = [
    AngularErrorMetric(task_weights=[1.0, 1.0]),
    CosineAngleMetric(task_weights=[1.0, 1.0]),
]  # Only the first metric is used for training and early stopping - can provide many though

In [23]:
metric_list = [nn.metrics.AngularCosine(lam=0.01)]

In [24]:
batch_norm = True

In [25]:
dmpnn = models.MPNN(
    mp,
    norm_agg,
    ffn,
    batch_norm,
    metric_list,
    warmup_epochs=0,
    init_lr=3e-4,
    max_lr=3e-4,
    final_lr=3e-4,  # plain Adam
)

In [26]:
class LossHistoryCallback(pl.Callback):
    def __init__(self):
        super().__init__()
        self.train_losses = []
        self.val_losses = []

    def on_train_epoch_end(self, trainer, pl_module):
        self.train_losses.append(trainer.callback_metrics["train_loss"].cpu().item())

    def on_validation_epoch_end(self, trainer, pl_module):
        self.val_losses.append(trainer.callback_metrics["val_loss"].cpu().item())


# 1) Instantiate callback and pass to Trainer
loss_history = LossHistoryCallback()


class DebugGradCallback(pl.Callback):
    def __init__(self, first_n_steps: int = 3):
        self.first_n_steps = first_n_steps

    # runs after every backward pass – perfect place to look at gradients
    # see Lightning docs, “on_after_backward” hook :contentReference[oaicite:0]{index=0}
    def on_after_backward(self, trainer, pl_module):
        if trainer.global_step < self.first_n_steps:
            print(f"\n— step {trainer.global_step} —")
            for name, p in pl_module.named_parameters():
                if p.requires_grad and p.grad is not None:
                    print(f"{name:40s} ‖grad‖={p.grad.norm().item():.3e}")
            lr = trainer.optimizers[0].param_groups[0]["lr"]
            print(f"current LR = {lr:.3e}")

    # grab the very first batch to inspect mask / targets
    def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
        if trainer.global_step == 0:
            *_, targets, _, _, _ = batch
            print("target mask unique:", targets.isfinite().unique())

In [27]:
def _configure_optimizers(self):
    core, head = [], []
    for n, p in self.named_parameters():
        (head if "predictor" in n else core).append(p)

    opt = torch.optim.Adam(
        [{'params': core, 'lr': 3e-4},
         {'params': head, 'lr': 3e-3}],
        weight_decay=1e-4,
    )
    #  Cosine schedule over *trainer.max_epochs*; Lambda defers bind
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(
        opt, T_max=self.trainer.max_epochs, eta_min=1e-5
    )
    return {"optimizer": opt, "lr_scheduler": sched}

# attach cleanly
dmpnn.configure_optimizers = _configure_optimizers.__get__(dmpnn)

In [28]:
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping

checkpoint_cb = ModelCheckpoint(
    monitor="val/angular-cos",
    mode="min",  # we want smaller angular error
    save_top_k=1,
    filename="best-{epoch:02d}-{val_maae_deg:.2f}",
)

earlystop_cb = EarlyStopping(
    monitor="val/angular-cos",
    mode="min",
    patience=10,  # stop if no improvement in 10 epochs
)

trainer = pl.Trainer(
    max_epochs=30,
    accelerator="auto",
    devices=1,
    callbacks=[checkpoint_cb, earlystop_cb, loss_history, DebugGradCallback()],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [32]:
from torch.optim import Adam
from torch.nn.utils import clip_grad_norm_
import matplotlib.pyplot as plt

mp_dim = dmpnn.message_passing.output_dim

# --- 2. predictor with no hard normalisation ------------------------------
class AngularFFNNoNorm(nn.AngularFFN):
    def forward(self, z):
        y = super(nn.AngularFFN, self).forward(z).view(len(z), 1, 2)
        return y                                # raw outputs (no F.normalize)

head = AngularFFNNoNorm(
    n_tasks=1, input_dim=mp_dim,
    hidden_dim=300, n_layers=2, dropout=0.0, activation="relu"
)
head.vector_target = True

# optional: non-zero init for final layer
last_linear = head.ffn[-1][-1]          # block -1  →  layer -1
torch.nn.init.normal_(last_linear.weight, mean=0.0, std=0.5)
torch.nn.init.normal_(last_linear.bias,   mean=0.0, std=0.1)



dmpnn.predictor = head                     # plug the head in
for p in dmpnn.message_passing.parameters():
    p.requires_grad = True

opt = Adam([
    {"params": dmpnn.message_passing.parameters(), "lr": 3e-4},
    {"params": dmpnn.predictor.parameters(),       "lr": 3e-3},
], weight_decay=1e-4)


lam = 0.01             # soft penalty weight
train_losses, val_losses = [], []

def angular_cos_loss(preds, targets, lam=0.01):
    eps = 1e-8
    norms = preds.norm(dim=-1)
    cos_delta = (preds * targets).sum(-1) / (
        norms * targets.norm(dim=-1) + eps
    )
    cos_term = 1.0 - cos_delta.clamp(-1.0 + 1e-7, 1.0 - 1e-7)
    unit_pen = lam * (norms - 1.0).pow(2)

    # Diagnostics
    if torch.isnan(cos_term).any() or torch.isnan(unit_pen).any():
        print("NaNs detected!")

    return (cos_term + unit_pen).mean(), cos_term, unit_pen, norms

def hybrid_loss(preds, targets, alpha=0.5, lam=0.01):
    eps = 1e-8
    cos_delta = (preds * targets).sum(-1) / (
        preds.norm(dim=-1) * targets.norm(dim=-1) + eps
    )
    cosine = 1.0 - cos_delta.clamp(-1.0 + 1e-7, 1.0 - 1e-7)
    mse = ((preds - targets) ** 2).sum(-1)
    unit_pen = lam * (preds.norm(dim=-1) - 1.0).pow(2)
    return (alpha * cosine + (1 - alpha) * mse + unit_pen).mean()



for epoch in range(30):
    dmpnn.train()
    total_loss = 0

    for batch_idx, batch in enumerate(train_loader):
        opt.zero_grad()
        preds = dmpnn(*batch[:3])
        loss = hybrid_loss(preds, batch[3], alpha=0.5, lam=lam)

        loss.backward()
        clip_grad_norm_(dmpnn.parameters(), max_norm=5.0)
        opt.step()
        total_loss += loss.item()
        # Run only first batch
        if batch_idx == 0:
            with torch.no_grad():
                preds = dmpnn(*batch[:3])
                print("post-init mean‖ŷ‖:", preds.norm(dim=-1).mean().item())
    train_loss = total_loss / len(train_loader)
    train_losses.append(train_loss)

    # Validation
    dmpnn.eval()
    with torch.no_grad():
        total_val_loss = 0
        for batch in val_loader:
            preds = dmpnn(*batch[:3])
            loss = hybrid_loss(preds, batch[3], alpha=0.5, lam=lam)
            total_val_loss += loss.item()
        val_loss = total_val_loss / len(val_loader)
        val_losses.append(val_loss)

    print(f"Epoch {epoch:2d} | Train loss: {train_loss:.4f} | Val loss: {val_loss:.4f}")


post-init mean‖ŷ‖: 8.011380195617676
Epoch  0 | Train loss: 1.8917 | Val loss: 1.0380
post-init mean‖ŷ‖: 0.49196380376815796
Epoch  1 | Train loss: 1.0519 | Val loss: 1.0239
post-init mean‖ŷ‖: 0.1278185099363327
Epoch  2 | Train loss: 1.0352 | Val loss: 1.0309
post-init mean‖ŷ‖: 0.09599997848272324
Epoch  3 | Train loss: 1.0331 | Val loss: 1.1419
post-init mean‖ŷ‖: 0.1310463696718216
Epoch  4 | Train loss: 1.0255 | Val loss: 1.0228
post-init mean‖ŷ‖: 0.08105972409248352
Epoch  5 | Train loss: 1.0068 | Val loss: 1.0531
post-init mean‖ŷ‖: 0.2314944565296173
Epoch  6 | Train loss: 1.0145 | Val loss: 1.0108
post-init mean‖ŷ‖: 0.15149596333503723
Epoch  7 | Train loss: 1.0317 | Val loss: 1.0174
post-init mean‖ŷ‖: 0.11548784375190735
Epoch  8 | Train loss: 1.0222 | Val loss: 1.0290
post-init mean‖ŷ‖: 0.11547130346298218
Epoch  9 | Train loss: 1.0130 | Val loss: 1.0051
post-init mean‖ŷ‖: 0.08722399175167084
Epoch 10 | Train loss: 1.0103 | Val loss: 1.0307
post-init mean‖ŷ‖: 0.1030368506908416

In [30]:
n_total, n_frozen = 0, 0
for n, p in dmpnn.named_parameters():
    n_total += 1
    if not p.requires_grad and "message_passing" in n:
        print(f"✓ frozen: {n}")
        n_frozen += 1
print(f"{n_frozen}/{n_total} parameters are frozen (message-passing only)")

✓ frozen: message_passing.W_i.weight
✓ frozen: message_passing.W_h.weight
✓ frozen: message_passing.W_o.weight
✓ frozen: message_passing.W_o.bias
4/12 parameters are frozen (message-passing only)


In [None]:
# subset_loader = data.build_dataloader(
#     torch.utils.data.Subset(train_dset, range(32)), batch_size=32, shuffle=True
# )
trainer.fit(dmpnn, train_loader, val_loader)


In [None]:
mp_dim = dmpnn.message_passing.output_dim

# --- 2. predictor with no hard normalisation ------------------------------
class AngularFFNNoNorm(nn.AngularFFN):
    def forward(self, z):
        y = super(nn.AngularFFN, self).forward(z).view(len(z), 1, 2)
        return y                                # raw outputs (no F.normalize)

head = AngularFFNNoNorm(
    n_tasks=1, input_dim=mp_dim,
    hidden_dim=300, n_layers=2, dropout=0.0, activation="relu"
)
head.vector_target = True

# optional: non-zero init for final layer
last_linear = head.ffn[-1][-1]          # block -1  →  layer -1
torch.nn.init.normal_(last_linear.weight, mean=0.0, std=0.2)
torch.nn.init.normal_(last_linear.bias,   mean=0.0, std=0.2)


dmpnn.predictor = head                     # plug the head in

# --- 3. tiny 32-sample subset --------------------------------------------


# --- 4. loss  -------------------------------------------------------------
def angular_cos_loss(preds, targets, lam=0.0):
    eps = 1e-8
    cos_delta = (preds * targets).sum(-1) / (
        preds.norm(dim=-1) * targets.norm(dim=-1) + eps
    )
    cosine_term = 1.0 - cos_delta.clamp(-1.0 + 1e-7, 1.0 - 1e-7)
    unit_pen    = lam * (preds.norm(dim=-1) - 1.0).pow(2)
    return (cosine_term + unit_pen).mean(), cosine_term

# --- 5. optimiser: higher LR for the new head -----------------------------
opt = torch.optim.Adam([
    {'params': dmpnn.message_passing.parameters(), 'lr': 3e-4},
    {'params': head.parameters(),                 'lr': 3e-3},
], weight_decay=0.0)

# --- 6. train  ------------------------------------------------------------
dmpnn.train()
step = 0
for epoch in range(200):
    for batch in subset_loader:
        opt.zero_grad()
        preds = dmpnn(*batch[:3])           # bmg, V_d, X_d  → model forward
        lam = 0.01            # start small
        loss, cos_term = angular_cos_loss(preds, batch[3], lam=lam)
        loss.backward()
        opt.step()

        if step % 20 == 0:
            print(f"step {step:4d}  loss {loss.item():.4f}  "
                  f"mean‖ŷ‖ {preds.norm(dim=-1).mean():.3f}  "
                  f"1−cosΔθ {cos_term.mean().item():.3f}")
        step += 1


In [None]:
# 1  build a brand-new model
head = nn.AngularFFN(
    n_tasks=1, input_dim=mp.output_dim,
    hidden_dim=300, n_layers=2, dropout=0.0, activation="relu"
)
head.vector_target = True      # keep (B,1,2)

torch.nn.init.normal_(head.ffn[-1].weight, mean=0.0, std=0.2)
torch.nn.init.normal_(head.ffn[-1].bias, mean=0.0, std=0.2)

# --- remove hard normalisation ---
def forward_no_norm(self, z):
    y = super(nn.AngularFFN, self).forward(z).view(len(z), 1, 2)
    return y                    # raw outputs
nn.AngularFFN.forward = forward_no_norm

# 2  soft unit-length penalty
def loss_fn_cos(preds, targets):
    cos_delta = (preds * targets).sum(-1) / (
        preds.norm(dim=-1) * targets.norm(dim=-1) + 1e-8
    )
    cosine = 1 - cos_delta.clamp(-1+1e-7, 1-1e-7)   # shape (B,1)
    unit_pen = 0.01 * (preds.norm(dim=-1) - 1).pow(2)   # same shape
    return (cosine + unit_pen).mean()

λ_unit = 0.00          # start at 0 for over-fit test

def angular_cos_loss(preds, targets, lam=λ_unit):
    eps = 1e-8
    cos_delta = (preds * targets).sum(-1) / (
        preds.norm(dim=-1) * targets.norm(dim=-1) + eps
    )
    cosine_term = 1.0 - cos_delta.clamp(-1.0 + 1e-7, 1.0 - 1e-7)
    unit_pen    = lam * (preds.norm(dim=-1) - 1.0).pow(2)
    return (cosine_term + unit_pen).mean()


# 3  optimiser – flat LR, no weight decay
opt = torch.optim.opt.Adam([
    {'params': dmpnn.message_passing.parameters(), 'lr': 3e-4},
    {'params': head.parameters(),                   'lr': 3e-3},
])
# 4  train on the 32-example loader for 300 steps
for epoch in range(200):
    for batch in subset_loader:
        opt.zero_grad()
        preds = dmpnn(batch.bmg)
        # loss  = loss_fn_cos(preds, batch[3])
        # loss = ((preds - batch[3]).pow(2).sum(-1)).mean()
        λ_unit = 0.00        # no penalty
        loss   = angular_cos_loss(preds, batch[3], lam=λ_unit)
        loss.backward()
        opt.step()
        print(f"epoch {epoch} loss {loss.item():.3f}")
        if step % 20 == 0:
        with torch.no_grad():
            print(f"step {step:4d}  loss {loss.item():.4f}  "
                f"mean‖ŷ‖ {preds.norm(dim=-1).mean():.3f}  "
                f"first cosΔθ {(1 - cosine_term.mean()).item():.3f}")


In [None]:
batch.bmg.y

In [None]:
import matplotlib.pyplot as plt

train_losses = loss_history.train_losses
# Need to drop the first value as it a sanity check
val_losses = loss_history.val_losses[1:]

assert len(train_losses) == len(val_losses), (
    f"Lengths now match: train={len(train_losses)}, val={len(val_losses)}"
)

epochs = range(1, len(train_losses) + 1)

plt.figure(figsize=(12,5))
plt.plot(epochs, train_losses, label='Train Loss', marker='o')
plt.plot(epochs, val_losses,   label='Validation Loss', marker='s')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch')
plt.xticks(epochs)
# Rotate x-ticks for better visibility
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
trainer.fit(
    dmpnn,
    train_loader,
    val_loader,
)

In [None]:
trainer.test(dmpnn, test_loader)

In [None]:
# print a batch
for batch in train_loader:
    print(batch)
    break

In [None]:
trainer.predict(dmpnn, train_loader)

In [None]:
for name, param in dmpnn.named_parameters():
    print(name, param.requires_grad)

In [None]:
print(dmpnn.training)  # Should be True

In [None]:
for name, param in dmpnn.named_parameters():
    if param.grad is not None:
        print(f"{name} grad norm: {param.grad.norm().item()}")
    else:
        print(f"{name} has no grad")

In [None]:
i = 0
for batch in train_loader:
    print(batch)
    break

In [None]:
batch[0].V

In [None]:
batch[0].batch

In [None]:
# batch.batch: tensor of node→mol mapping
for mol_idx in range(batch[0].batch.max().item() + 1):
    mol_mask = batch[0].batch == mol_idx
    print(f"Mol {mol_idx} node features:")
    print(batch[0].V[mol_mask])

In [None]:
all_data[0].mol

In [None]:
print(all_data[0].mol.GetAtomWithIdx(1).GetHybridization())
print(all_data[0].mol.GetAtomWithIdx(1).GetSymbol())

In [None]:
print(all_data[0].mol.GetAtomWithIdx(2).GetHybridization())
print(all_data[0].mol.GetAtomWithIdx(2).GetSymbol())

print(all_data[0].mol.GetAtomWithIdx(4).GetHybridization())
print(all_data[0].mol.GetAtomWithIdx(4).GetSymbol())

In [None]:
Chem.MolToSmiles(all_data[0].mol)

In [None]:
for i in range(all_data[0].mol.GetNumAtoms()):
    atom = all_data[0].mol.GetAtomWithIdx(i)
    print(
        f"Atom {i} ({atom.GetSymbol()}): Degree={atom.GetDegree()}, Hybridization={atom.GetHybridization()}"
    )

In [None]:
for bond in all_data[0].mol.GetBonds():
    print(f"{bond.GetBeginAtomIdx()} - {bond.GetEndAtomIdx()} ({bond.GetBondType()})")