In [1]:
from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors

import pandas as pd
import numpy as np

In [2]:
def desc_FP(data):
    desc =[]
    FP = []
    
    calc = MoleculeDescriptors.MolecularDescriptorCalculator([x[0] for x in Descriptors._descList])
    desc_names = calc.GetDescriptorNames()

    for smi in data:
        mol = Chem.MolFromSmiles(smi)
        fp1 =  AllChem.GetMorganFingerprintAsBitVect(mol,2,256)
        fp2 = np.array(fp1)
        FP.append(fp2)
        
        mol = Chem.AddHs(mol)
        #AllChem.EmbedMolecule(mol, randomSeed=0xf00d)
        #AllChem.MMFFOptimizeMolecule(mol)
        descriptors = calc.CalcDescriptors(mol)
        desc.append(descriptors)
                
    df_FP = pd.DataFrame(np.array(FP))
    df_desc = pd.DataFrame(desc,columns=desc_names)
    
    df_FP_desc = pd.concat([df_FP, df_desc], axis=1)
    
    return df_FP_desc

In [3]:
dataset = pd.read_csv('../3_Ni_smiles/Ni_borylation_RCTAsmiles.can', header=None, names=['SMILES'])
descRCTA = desc_FP(dataset['SMILES'])
descRCTA = descRCTA.add_prefix('RCTA_')
descRCTA.shape

(1896, 464)

In [4]:
dataset = pd.read_csv('../3_Ni_smiles/Ni_borylation_RCTBsmiles.can', header=None, names=['SMILES'])
descRCTB = desc_FP(dataset['SMILES'])
descRCTB = descRCTB.add_prefix('RCTB_')
descRCTB.shape

(1896, 464)

In [5]:
dataset = pd.read_csv('../3_Ni_smiles/Ni_borylation_CATsmiles.can', header=None, names=['SMILES'])
descCAT = desc_FP(dataset['SMILES'])
descCAT = descCAT.add_prefix('CAT_')
descCAT.shape

(1896, 464)

In [6]:
dataset = pd.read_csv('../3_Ni_smiles/Ni_borylation_PROsmiles.can', header=None, names=['SMILES'])
descPRO = desc_FP(dataset['SMILES'])
descPRO = descPRO.add_prefix('PRO_')
descPRO.shape

(1896, 464)

In [7]:
ADDsmiles = pd.read_csv('../3_Ni_smiles/Ni_borylation_ADDsmiles.smi', header=None, names=['ADD'])
ADDsmiles.shape

(1896, 1)

In [8]:
SOLsmiles = pd.read_csv('../3_Ni_smiles/Ni_borylation_SOLsmiles.smi', header=None, names=['SOL'])
SOLsmiles.shape

(1896, 1)

In [9]:
all_yields = pd.read_csv('../3_Ni_smiles/Ni_borylation_yield.csv', header=None, names=['yield'])
all_yields.shape

(1896, 1)

In [10]:
data = pd.concat([descRCTA, descRCTB, descCAT, descPRO, ADDsmiles, SOLsmiles], axis=1)
#data = pd.concat([descRCTA, descRCTB, descCAT, descPRO], axis=1)
data.to_csv('Ni_borylation_desc.csv', index=False)

data = pd.concat([data, all_yields], axis=1)
data.to_csv('Ni_borylation_desc_yields.csv', index=False)