In [None]:
import pandas as pd
import numpy as np
import copy
from typing import List
from rdkit import Chem
import numpy as np
from rdkit.Chem import Mol, AllChem
from cosymlib import Molecule, Geometry

In [4]:
def get_geo_symmetry(symmetry: str, coords: np.array, atom_ids: List[str], bonds: List[List[int]]):
    # Define geometry
    geometry = Geometry(positions=coords.tolist(),
                        symbols=atom_ids.tolist(),
                        connectivity=bonds)

    # Geometrical symmetry measure
    sym_geom_measure = geometry.get_symmetry_measure(symmetry, central_atom=1)
    return sym_geom_measure

In [5]:
def get_3d_geometry_with_atom_type_identifiers(mol):
    conf = mol.GetConformer()
    coords = np.array([conf.GetAtomPosition(i) for i in range(mol.GetNumAtoms())])
    atom_ids = np.array([atom.GetSymbol() for atom in mol.GetAtoms()])
    bonds = set()
    for i, atom in enumerate(Mol.GetAtoms(mol)):
        for bond in atom.GetBonds():
            indexes = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()]
            bonds.add((min(indexes)+1, max(indexes)+1))
    return coords, atom_ids, list(bonds)

In [None]:
def get_cs_symmetry_from_mol(mol: Mol):
    try:
        # print(smiles)
        return get_geo_symmetry('Cs', *get_3d_geometry_with_atom_type_identifiers(mol))
    except:
        return None

In [None]:
def chi_char(atom_table):
    # Calculate separation vectors
    x_sep = np.diff(atom_table['x'])
    y_sep = np.diff(atom_table['y'])
    z_sep = np.diff(atom_table['z'])
    
    # Calculate normalization denominator
    sum_dist = 0
    for i in range(len(x_sep) - 2):
        sum_dist += (np.sqrt(x_sep[i]**2 + y_sep[i]**2 + z_sep[i]**2) *
                     np.sqrt(x_sep[i+1]**2 + y_sep[i+1]**2 + z_sep[i+1]**2) *
                     np.sqrt(x_sep[i+2]**2 + y_sep[i+2]**2 + z_sep[i+2]**2))
    
    # Calculate numerator, i.e., the scalar-triple-product chirality measure
    res = 0
    for i in range(len(x_sep) - 2):
        res += (x_sep[i] * y_sep[i+1] - x_sep[i+1] * y_sep[i]) * z_sep[i+2] + \
               (y_sep[i] * z_sep[i+1] - y_sep[i+1] * z_sep[i]) * x_sep[i+2] + \
               (z_sep[i] * x_sep[i+1] - z_sep[i+1] * x_sep[i]) * y_sep[i+2]
    
    # Return the chirality characteristic
    return res / sum_dist if sum_dist != 0 else np.nan

In [3]:
test_final_RSA = pd.read_pickle("test_final_RSA.pkl")
train_final_RSA = pd.read_pickle("train_final_RSA.pkl")
validation_final_RSA = pd.read_pickle("validation_final_RSA.pkl")

In [7]:
test_final_RSA['CCM'] = test_final_RSA['rdkit_mol_cistrans_stereo'].apply(get_cs_symmetry_from_mol)
train_final_RSA['CCM'] = train_final_RSA['rdkit_mol_cistrans_stereo'].apply(get_cs_symmetry_from_mol)
validation_final_RSA['CCM'] = validation_final_RSA['rdkit_mol_cistrans_stereo'].apply(get_cs_symmetry_from_mol)

In [None]:
test_final_RSA.to_pickle('test_final_CCM.pkl')
train_final_RSA.to_pickle('train_final_CCM.pkl')
validation_final_RSA.to_pickle('validation_final_CCM.pkl')