In [None]:
from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors

from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

from mordred import Calculator, descriptors

import pandas as pd

import numpy as np

In [None]:
def mordred_descriptors(data):
    mols = []    
    for smi in data:
        mol = Chem.MolFromSmiles(smi)
        #mol = Chem.AddHs(mol)
        #AllChem.EmbedMolecule(mol, randomSeed=0xf00d)
        #AllChem.MMFFOptimizeMolecule(mol)
        mols.append(mol)
    calc = Calculator(descriptors, ignore_3D=True)
    df_desc = calc.pandas(mols)     
    df_desc = df_desc.select_dtypes(include=np.number).astype('float32')
    df_desc = df_desc.loc[:, df_desc.var() > 0.0]
    return df_desc

In [None]:
dataset = pd.read_csv('../3_makeSmiles/Co_borylation_RCTAsmiles.can', header=None, names=['SMILES'])
descRCTA = mordred_descriptors(dataset['SMILES'])
descRCTA = descRCTA.add_prefix('RCTA_')
descRCTA.shape

In [None]:
dataset = pd.read_csv('../3_makeSmiles/Co_borylation_RCTBsmiles.can', header=None, names=['SMILES'])
descRCTB = mordred_descriptors(dataset['SMILES'])
descRCTB = descRCTB.add_prefix('RCTB_')
descRCTB.shape

In [None]:
dataset = pd.read_csv('../3_makeSmiles/Co_borylation_CATsmiles.can', header=None, names=['SMILES'])
descCAT = mordred_descriptors(dataset['SMILES'])
descCAT = descCAT.add_prefix('CAT_')
descCAT.shape

In [None]:
dataset = pd.read_csv('../3_makeSmiles/Co_borylation_PROsmiles.can', header=None, names=['SMILES'])
descPRO = mordred_descriptors(dataset['SMILES'])
descPRO = descPRO.add_prefix('PRO_')
descPRO.shape

In [None]:
all_yields = pd.read_csv('../3_makeSmiles/Co_borylation_yield.csv', header=None, names=['yield'])
all_yields.shape

In [None]:
data = pd.concat([descRCTA, descRCTB, descCAT, descPRO], axis=1)
data.to_csv('Co_borylation_desc.csv', index=False)

data = pd.concat([data, all_yields], axis=1)
data.to_csv('Co_borylation_desc_yields.csv', index=False)