In [1]:
import pandas as pd

In [19]:
from rdkit import Chem
from rdkit.Chem.Scaffolds import MurckoScaffold
from rdkit.Chem import Draw

# Scaffold computation

In [3]:
chembl = pd.read_csv('data/chembl.tar.xz', compression='xz',usecols=['smiles'])

In [6]:
chembl['scaffold'] = chembl['smiles'].apply(lambda x: MurckoScaffold.MurckoScaffoldSmiles(smiles=x, includeChirality=True) )

In [7]:
moses = pd.read_csv('data/moses.tar.xz', compression='xz',usecols=['smiles'])

In [8]:
moses['scaffold'] = moses['smiles'].apply(lambda x: MurckoScaffold.MurckoScaffoldSmiles(smiles=x, includeChirality=True) )

# Rings

In [None]:
from chemicalgof.decompositer import SINGLEXOCYCLICPATT

def get_rings(mol):
    bondMatches = mol.GetSubstructMatches( Chem.MolFromSmarts(SINGLEXOCYCLICPATT) )
    bonds=[mol.GetBondBetweenAtoms(*b).GetIdx() for b in bondMatches]  
    frags = Chem.FragmentOnBonds(mol, addDummies=False, bondIndices=bonds, )
    fragsMol=Chem.GetMolFrags(frags,asMols=True)
    cycles = [ frag for frag in fragsMol if frag.HasSubstructMatch(Chem.MolFromSmarts('[R]'))]
    cycles_smi = [Chem.MolToSmiles(mol) for mol in cycles ]
    return tuple(cycles_smi)

In [72]:
chembl['rings'] = chembl['smiles'].apply( lambda x: get_rings(Chem.MolFromSmiles(x)) ).apply(' '.join)

In [73]:
moses['rings'] = moses['smiles'].apply( lambda x: get_rings(Chem.MolFromSmiles(x)) ).apply(' '.join)

In [74]:
chembl.to_csv('data/chembl_scaffolds_rings.tar.xz', compression='xz', index=False)
moses.to_csv('data/moses_scaffolds_rings.tar.xz', compression='xz', index=False)