In [1]:
from rdkit.Chem import AllChem
from rdkit import Chem

import pandas as pd
import numpy as np

In [2]:
def morgan_fingerprint(data):
    morganFP = []
    for smi in data:
        mol = Chem.MolFromSmiles(smi)
        #mol = Chem.AddHs(mol)
        #AllChem.EmbedMolecule(mol, randomSeed=0xf00d)
        #AllChem.MMFFOptimizeMolecule(mol)
        #fpts =  AllChem.GetMorganFingerprintAsBitVect(mol,2,256)
        fpts =  AllChem.RDKFingerprint(mol)
        mfpts = np.array(fpts)
        morganFP.append(mfpts)
    df_FP = pd.DataFrame(np.array(morganFP))
    return df_FP

In [3]:
dataset = pd.read_csv('../3_Co_makeSmiles/Co_borylation_RCTAsmiles.can', header=None, names=['SMILES'])
descRCTA = morgan_fingerprint(dataset['SMILES'])
descRCTA = descRCTA.add_prefix('RCTA_')
descRCTA.shape

(938, 256)

In [4]:
dataset = pd.read_csv('../3_Co_makeSmiles/Co_borylation_RCTBsmiles.can', header=None, names=['SMILES'])
descRCTB = morgan_fingerprint(dataset['SMILES'])
descRCTB = descRCTB.add_prefix('RCTB_')
descRCTB.shape

(938, 256)

In [5]:
dataset = pd.read_csv('../3_Co_makeSmiles/Co_borylation_CATsmiles.can', header=None, names=['SMILES'])
descCAT = morgan_fingerprint(dataset['SMILES'])
descCAT = descCAT.add_prefix('CAT_')
descCAT.shape

(938, 256)

In [6]:
dataset = pd.read_csv('../3_Co_makeSmiles/Co_borylation_PROsmiles.can', header=None, names=['SMILES'])
descPRO = morgan_fingerprint(dataset['SMILES'])
descPRO = descPRO.add_prefix('PRO_')
descPRO.shape

(938, 256)

In [7]:
ADDsmiles = pd.read_csv('../3_Co_makeSmiles/Co_borylation_ADDsmiles.smi', header=None, names=['ADD'])
ADDsmiles.shape

(938, 1)

In [8]:
SOLsmiles = pd.read_csv('../3_Co_makeSmiles/Co_borylation_SOLsmiles.smi', header=None, names=['SOL'])
SOLsmiles.shape

(938, 1)

In [9]:
all_yields = pd.read_csv('../3_Co_makeSmiles/Co_borylation_yield.csv', header=None, names=['yield'])
all_yields.shape

(938, 1)

In [10]:
data = pd.concat([descRCTA, descRCTB, descCAT, descPRO, ADDsmiles, SOLsmiles], axis=1)
#data = pd.concat([descRCTA, descRCTB, descCAT, descPRO], axis=1)
data.to_csv('Co_borylation_morg.csv', index=False)

data = pd.concat([data, all_yields], axis=1)
data.to_csv('Co_borylation_morg_yields.csv', index=False)