In [65]:
import pandas as pd
import numpy as np

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import PandasTools
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from rdkit.Chem import rdFingerprintGenerator
from rdkit import DataStructs
from rdkit.Chem import MACCSkeys
from rdkit.Chem.Fingerprints import FingerprintMols

from descriptastorus.descriptors.DescriptorGenerator import MakeGenerator
#https://github.com/bp-kelley/descriptastorus

from mordred import Calculator, descriptors

In [3]:
train = pd.read_csv('../data/train_test_sets/train.csv', index_col = 'CASRN')
test = pd.read_csv('../data/train_test_sets/test.csv', index_col = 'CASRN')

In [4]:
PandasTools.AddMoleculeColumnToFrame(train,smilesCol='SMILES')
PandasTools.AddMoleculeColumnToFrame(test,smilesCol='SMILES')

## ECFP6 Bits

In [8]:
# morgan fingerprint
train_ECFP6 = [AllChem.GetMorganFingerprintAsBitVect(x,3) for x in train['ROMol']]
test_ECFP6 = [AllChem.GetMorganFingerprintAsBitVect(x,3) for x in test['ROMol']]

In [165]:
train_ecfp6_lists = [list(l) for l in train_ECFP6]
test_ecfp6_lists = [list(l) for l in test_ECFP6]

In [170]:
ecfp6_name = [f'Bit_{i}' for i in range(2048)]

In [171]:
train_ecfp6_df = pd.DataFrame(train_ecfp6_lists, index = train.index, columns=ecfp6_name)
test_ecfp6_df = pd.DataFrame(test_ecfp6_lists, index = test.index, columns=ecfp6_name)

In [172]:
train_ecfp6_df.shape, test_ecfp6_df.shape

((8221, 2048), (2849, 2048))

In [175]:
train_ecfp6_df.to_csv('../data/Descriptors/train_ecfp6_bits.csv')
test_ecfp6_df.to_csv('../data/Descriptors/test_ecfp6_bits.csv')

## ECFP6 counts

In [184]:
# morgan fingerprint counts
train_ECFP6_counts = [AllChem.GetHashedMorganFingerprint(x,3) for x in train['ROMol']]
test_ECFP6_counts = [AllChem.GetHashedMorganFingerprint(x,3) for x in test['ROMol']]

In [185]:
train_ecfp6_counts_lists = [list(l) for l in train_ECFP6_counts]
test_ecfp6__counts_lists = [list(l) for l in test_ECFP6_counts]

In [186]:
train_ecfp6_counts_df = pd.DataFrame(train_ecfp6_counts_lists, index = train.index, columns=ecfp6_name)
test_ecfp6_counts_df = pd.DataFrame(test_ecfp6__counts_lists, index = test.index, columns=ecfp6_name)

In [187]:
train_ecfp6_counts_df.shape, test_ecfp6_counts_df.shape

((8221, 2048), (2849, 2048))

In [189]:
train_ecfp6_counts_df.to_csv('../data/Descriptors/train_ecfp6_counts.csv')
test_ecfp6_counts_df.to_csv('../data/Descriptors/test_ecfp6_counts.csv')

## MACCS keys

In [11]:
# MACCS keys
train_maccs = [MACCSkeys.GenMACCSKeys(x) for x in train['ROMol']]
test_maccs = [MACCSkeys.GenMACCSKeys(x) for x in test['ROMol']]

In [159]:
train_maccs_lists = [list(l) for l in train_maccs]
test_maccs_lists = [list(l) for l in test_maccs]

In [160]:
maccs_name = [f'Bit_{i}' for i in range(167)]

In [161]:
train_maccs_df = pd.DataFrame(train_maccs_lists, index = train.index, columns=maccs_name)
test_maccs_df = pd.DataFrame(test_maccs_lists, index = test.index, columns=maccs_name)

In [162]:
train_maccs_df.shape, test_maccs_df.shape

((8221, 167), (2849, 167))

In [164]:
train_maccs_df.to_csv('../data/Descriptors/train_maccs.csv')
test_maccs_df.to_csv('../data/Descriptors/test_maccs.csv')

## RDKit

In [54]:
# RDKit
generator = MakeGenerator(("RDKit2D",))

train_rdkit2d = [generator.process(x)[1:] for x in train['SMILES']]
test_rdkit2d = [generator.process(x)[1:] for x in test['SMILES']]

In [133]:
rdkit2d_name = []
for name, numpy_type in generator.GetColumns():
    rdkit2d_name.append(name)

In [137]:
train_rdkit2d_df = pd.DataFrame(train_rdkit2d, index = train.index, columns=rdkit2d_name[1:])
test_rdkit2d_df = pd.DataFrame(test_rdkit2d, index = test.index, columns=rdkit2d_name[1:])

In [138]:
train_rdkit2d_df.shape, test_rdkit2d_df.shape

((8221, 200), (2849, 200))

In [141]:
train_rdkit2d_df.to_csv('../data/Descriptors/train_rdkit2d.csv')
test_rdkit2d_df.to_csv('../data/Descriptors/test_rdkit2d.csv')

## mordred

In [60]:
#mordred
mordred_calc = Calculator(descriptors, ignore_3D=True)  # can't do 3D without sdf or mol file

train_mordred = mordred_calc.pandas([mol for mol in train['ROMol']])
test_mordred = mordred_calc.pandas([mol for mol in test['ROMol']])

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [62]:
train_mordred.shape, test_mordred.shape

((8221, 1613), (2849, 1613))

In [119]:
# remove non numerical features.
train_mordred = train_mordred.select_dtypes(include=['float64', 'int64', 'float'])
test_mordred = test_mordred[list(train_mordred)]
train_mordred.shape, test_mordred.shape

((8221, 1071), (2849, 1071))

In [120]:
test_mordred.head(1)

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,VE1_A,VE2_A,VE3_A,VR1_A,VR2_A,nAromAtom,nAromBond,nAtom,nHeavyAtom,nSpiro,nBridgehead,nHetero,nH,nB,nC,nN,nO,nS,nP,nF,nCl,nBr,nI,nX,ATS0dv,ATS1dv,ATS2dv,ATS3dv,ATS4dv,ATS5dv,ATS6dv,ATS7dv,ATS8dv,ATS0d,ATS1d,ATS2d,ATS3d,ATS4d,ATS5d,ATS6d,ATS7d,ATS8d,ATS0Z,ATS1Z,ATS2Z,ATS3Z,ATS4Z,ATS5Z,ATS6Z,ATS7Z,ATS8Z,ATS0m,ATS1m,ATS2m,ATS3m,ATS4m,ATS5m,ATS6m,ATS7m,ATS8m,ATS0v,ATS1v,ATS2v,ATS3v,ATS4v,ATS5v,ATS6v,ATS7v,ATS8v,ATS0se,ATS1se,ATS2se,ATS3se,ATS4se,ATS5se,ATS6se,ATS7se,ATS8se,ATS0pe,ATS1pe,ATS2pe,ATS3pe,ATS4pe,ATS5pe,ATS6pe,ATS7pe,ATS8pe,ATS0are,ATS1are,ATS2are,ATS3are,ATS4are,ATS5are,ATS6are,ATS7are,ATS8are,ATS0p,ATS1p,ATS2p,ATS3p,ATS4p,ATS5p,ATS6p,ATS7p,ATS8p,ATS0i,ATS1i,ATS2i,ATS3i,ATS4i,ATS5i,ATS6i,ATS7i,ATS8i,AATS0dv,AATS1dv,AATS2dv,AATS0d,AATS1d,AATS2d,AATS0Z,AATS1Z,AATS2Z,AATS0m,AATS1m,AATS2m,AATS0v,AATS1v,AATS2v,AATS0se,AATS1se,AATS2se,AATS0pe,AATS1pe,AATS2pe,AATS0are,AATS1are,AATS2are,AATS0p,AATS1p,AATS2p,AATS0i,AATS1i,AATS2i,ATSC0c,ATSC1c,ATSC2c,ATSC3c,ATSC4c,ATSC5c,ATSC6c,ATSC7c,ATSC8c,ATSC0dv,ATSC1dv,ATSC2dv,ATSC3dv,ATSC4dv,ATSC5dv,ATSC6dv,ATSC7dv,ATSC8dv,ATSC0d,ATSC1d,ATSC2d,ATSC3d,ATSC4d,ATSC5d,ATSC6d,ATSC7d,ATSC8d,ATSC0Z,ATSC1Z,ATSC2Z,ATSC3Z,ATSC4Z,ATSC5Z,ATSC6Z,ATSC7Z,ATSC8Z,ATSC0m,ATSC1m,ATSC2m,ATSC3m,ATSC4m,ATSC5m,ATSC6m,ATSC7m,ATSC8m,ATSC0v,ATSC1v,ATSC2v,ATSC3v,ATSC4v,ATSC5v,ATSC6v,ATSC7v,ATSC8v,ATSC0se,ATSC1se,ATSC2se,ATSC3se,ATSC4se,ATSC5se,ATSC6se,ATSC7se,ATSC8se,ATSC0pe,ATSC1pe,ATSC2pe,ATSC3pe,ATSC4pe,ATSC5pe,ATSC6pe,ATSC7pe,ATSC8pe,ATSC0are,ATSC1are,ATSC2are,ATSC3are,ATSC4are,ATSC5are,ATSC6are,ATSC7are,ATSC8are,ATSC0p,ATSC1p,ATSC2p,ATSC3p,ATSC4p,ATSC5p,ATSC6p,ATSC7p,ATSC8p,ATSC0i,ATSC1i,ATSC2i,ATSC3i,ATSC4i,ATSC5i,ATSC6i,ATSC7i,ATSC8i,AATSC0c,AATSC1c,AATSC2c,AATSC0dv,AATSC1dv,AATSC2dv,AATSC0d,AATSC1d,AATSC2d,AATSC0Z,AATSC1Z,AATSC2Z,AATSC0m,AATSC1m,AATSC2m,AATSC0v,AATSC1v,AATSC2v,AATSC0se,AATSC1se,AATSC2se,AATSC0pe,AATSC1pe,AATSC2pe,AATSC0are,AATSC1are,AATSC2are,AATSC0p,AATSC1p,AATSC2p,AATSC0i,AATSC1i,AATSC2i,MATS1c,MATS2c,MATS1Z,MATS2Z,MATS1m,MATS2m,MATS1v,MATS2v,MATS1se,MATS2se,MATS1pe,MATS2pe,MATS1are,MATS2are,MATS1p,MATS2p,MATS1i,MATS2i,GATS1c,GATS2c,GATS1Z,GATS2Z,GATS1m,GATS2m,GATS1v,GATS2v,GATS1se,GATS2se,GATS1pe,GATS2pe,GATS1are,GATS2are,GATS1p,GATS2p,GATS1i,GATS2i,BCUTc-1h,BCUTc-1l,BCUTdv-1h,BCUTdv-1l,BCUTd-1h,BCUTd-1l,BCUTZ-1h,BCUTZ-1l,BCUTm-1h,BCUTm-1l,BCUTv-1h,BCUTv-1l,BCUTse-1h,BCUTse-1l,BCUTpe-1h,BCUTpe-1l,BCUTare-1h,BCUTare-1l,BCUTp-1h,BCUTp-1l,BCUTi-1h,BCUTi-1l,BalabanJ,SpAbs_DzZ,SpMax_DzZ,SpDiam_DzZ,SpAD_DzZ,SpMAD_DzZ,LogEE_DzZ,SM1_DzZ,VE1_DzZ,VE2_DzZ,VE3_DzZ,VR1_DzZ,VR2_DzZ,SpAbs_Dzm,SpMax_Dzm,SpDiam_Dzm,SpAD_Dzm,SpMAD_Dzm,LogEE_Dzm,SM1_Dzm,VE1_Dzm,VE2_Dzm,VE3_Dzm,VR1_Dzm,VR2_Dzm,SpAbs_Dzv,SpMax_Dzv,SpDiam_Dzv,SpAD_Dzv,SpMAD_Dzv,LogEE_Dzv,SM1_Dzv,VE1_Dzv,VE2_Dzv,VE3_Dzv,VR1_Dzv,VR2_Dzv,SpAbs_Dzse,SpMax_Dzse,SpDiam_Dzse,SpAD_Dzse,SpMAD_Dzse,LogEE_Dzse,SM1_Dzse,VE1_Dzse,VE2_Dzse,VE3_Dzse,VR1_Dzse,VR2_Dzse,SpAbs_Dzpe,SpMax_Dzpe,SpDiam_Dzpe,SpAD_Dzpe,SpMAD_Dzpe,LogEE_Dzpe,SM1_Dzpe,VE1_Dzpe,VE2_Dzpe,VE3_Dzpe,VR1_Dzpe,VR2_Dzpe,SpAbs_Dzare,SpMax_Dzare,SpDiam_Dzare,SpAD_Dzare,SpMAD_Dzare,LogEE_Dzare,SM1_Dzare,VE1_Dzare,VE2_Dzare,VE3_Dzare,VR1_Dzare,VR2_Dzare,SpAbs_Dzp,SpMax_Dzp,SpDiam_Dzp,SpAD_Dzp,SpMAD_Dzp,LogEE_Dzp,SM1_Dzp,VE1_Dzp,VE2_Dzp,VE3_Dzp,VR1_Dzp,VR2_Dzp,SpAbs_Dzi,SpMax_Dzi,SpDiam_Dzi,SpAD_Dzi,SpMAD_Dzi,LogEE_Dzi,SM1_Dzi,VE1_Dzi,VE2_Dzi,VE3_Dzi,VR1_Dzi,VR2_Dzi,BertzCT,nBonds,nBondsO,nBondsS,nBondsD,nBondsT,nBondsA,nBondsM,nBondsKS,nBondsKD,RNCG,RPCG,C1SP1,C2SP1,C1SP2,C2SP2,C3SP2,C1SP3,C2SP3,C3SP3,C4SP3,FCSP3,Xch-3d,Xch-4d,Xch-5d,Xch-6d,Xch-7d,Xch-3dv,Xch-4dv,Xch-5dv,Xch-6dv,Xch-7dv,Xc-3d,Xc-4d,Xc-5d,Xc-6d,Xc-3dv,Xc-4dv,Xc-5dv,Xc-6dv,Xpc-4d,Xpc-5d,Xpc-6d,Xpc-4dv,Xpc-5dv,Xpc-6dv,Xp-1d,Xp-2d,Xp-3d,Xp-4d,Xp-5d,Xp-6d,Xp-7d,Xp-1dv,Xp-2dv,Xp-3dv,Xp-4dv,Xp-5dv,Xp-6dv,Xp-7dv,SZ,Sm,Sv,Sse,Spe,Sare,Sp,Si,MZ,Mm,Mv,Mse,Mpe,Mare,Mp,Mi,SpAbs_Dt,SpMax_Dt,SpDiam_Dt,SpAD_Dt,SpMAD_Dt,LogEE_Dt,SM1_Dt,VE1_Dt,VE2_Dt,VE3_Dt,VR1_Dt,VR2_Dt,DetourIndex,SpAbs_D,SpMax_D,SpDiam_D,SpAD_D,SpMAD_D,LogEE_D,VE1_D,VE2_D,VE3_D,VR1_D,VR2_D,NsLi,NssBe,NssssBe,NssBH,NsssB,NssssB,NsCH3,NdCH2,NssCH2,NtCH,NdsCH,NaaCH,NsssCH,NddC,NtsC,NdssC,NaasC,NaaaC,NssssC,NsNH3,NsNH2,NssNH2,NdNH,NssNH,NaaNH,NtN,NsssNH,NdsN,NaaN,NsssN,NddsN,NaasN,NssssN,NsOH,NdO,NssO,NaaO,NsF,NsSiH3,NssSiH2,NsssSiH,NssssSi,NsPH2,NssPH,NsssP,NdsssP,NsssssP,NsSH,NdS,NssS,NaaS,NdssS,NddssS,NsCl,NsGeH3,NssGeH2,NsssGeH,NssssGe,NsAsH2,NssAsH,NsssAs,NsssdAs,NsssssAs,NsSeH,NdSe,NssSe,NaaSe,NdssSe,NddssSe,NsBr,NsSnH3,NssSnH2,NsssSnH,NssssSn,NsI,NsPbH3,NssPbH2,NsssPbH,NssssPb,SsLi,SssBe,SssssBe,SssBH,SsssB,SssssB,SsCH3,SdCH2,SssCH2,StCH,SdsCH,SaaCH,SsssCH,SddC,StsC,SdssC,SaasC,SaaaC,SssssC,SsNH3,SsNH2,SssNH2,SdNH,SssNH,SaaNH,StN,SsssNH,SdsN,SaaN,SsssN,SddsN,SaasN,SssssN,SsOH,SdO,SssO,SaaO,SsF,SsSiH3,SssSiH2,SsssSiH,SssssSi,SsPH2,SssPH,SsssP,SdsssP,SsssssP,SsSH,SdS,SssS,SaaS,SdssS,SddssS,SsCl,SsGeH3,SssGeH2,SsssGeH,SssssGe,SsAsH2,SssAsH,SsssAs,SsssdAs,SsssssAs,SsSeH,SdSe,SssSe,SaaSe,SdssSe,SddssSe,SsBr,SsSnH3,SssSnH2,SsssSnH,SssssSn,SsI,SsPbH3,SssPbH2,SsssPbH,SssssPb,ECIndex,ETA_alpha,AETA_alpha,ETA_shape_p,ETA_shape_y,ETA_shape_x,ETA_beta,AETA_beta,ETA_beta_s,AETA_beta_s,ETA_beta_ns,AETA_beta_ns,ETA_beta_ns_d,AETA_beta_ns_d,ETA_eta,AETA_eta,ETA_eta_L,AETA_eta_L,ETA_eta_R,AETA_eta_R,ETA_eta_RL,AETA_eta_RL,ETA_eta_F,AETA_eta_F,ETA_eta_FL,AETA_eta_FL,ETA_dAlpha_A,ETA_dAlpha_B,ETA_epsilon_1,ETA_epsilon_2,ETA_epsilon_3,ETA_epsilon_4,ETA_epsilon_5,ETA_dEpsilon_A,ETA_dEpsilon_B,ETA_dEpsilon_C,ETA_dEpsilon_D,ETA_dBeta,AETA_dBeta,ETA_psi_1,ETA_dPsi_A,ETA_dPsi_B,fragCpx,fMF,nHBAcc,nHBDon,IC0,IC1,IC2,IC3,IC4,IC5,TIC0,TIC1,TIC2,TIC3,TIC4,TIC5,SIC0,SIC1,SIC2,SIC3,SIC4,SIC5,BIC0,BIC1,BIC2,BIC3,BIC4,BIC5,CIC0,CIC1,CIC2,CIC3,CIC4,CIC5,MIC0,MIC1,MIC2,MIC3,MIC4,MIC5,ZMIC0,ZMIC1,ZMIC2,ZMIC3,ZMIC4,ZMIC5,FilterItLogS,VMcGowan,LabuteASA,PEOE_VSA1,PEOE_VSA2,PEOE_VSA3,PEOE_VSA4,PEOE_VSA5,PEOE_VSA6,PEOE_VSA7,PEOE_VSA8,PEOE_VSA9,PEOE_VSA10,PEOE_VSA11,PEOE_VSA12,PEOE_VSA13,SMR_VSA1,SMR_VSA2,SMR_VSA3,SMR_VSA4,SMR_VSA5,SMR_VSA6,SMR_VSA7,SMR_VSA8,SMR_VSA9,SlogP_VSA1,SlogP_VSA2,SlogP_VSA3,SlogP_VSA4,SlogP_VSA5,SlogP_VSA6,SlogP_VSA7,SlogP_VSA8,SlogP_VSA9,SlogP_VSA10,SlogP_VSA11,EState_VSA1,EState_VSA2,EState_VSA3,EState_VSA4,EState_VSA5,EState_VSA6,EState_VSA7,EState_VSA8,EState_VSA9,EState_VSA10,VSA_EState1,VSA_EState2,VSA_EState3,VSA_EState4,VSA_EState5,VSA_EState6,VSA_EState7,VSA_EState8,VSA_EState9,MID,AMID,MID_h,AMID_h,MID_C,AMID_C,MID_N,AMID_N,MID_O,AMID_O,MID_X,AMID_X,MPC2,MPC3,MPC4,MPC5,MPC6,MPC7,MPC8,MPC9,MPC10,TMPC10,piPC1,piPC2,piPC3,piPC4,piPC5,piPC6,piPC7,piPC8,piPC9,piPC10,TpiPC10,apol,bpol,nRing,n3Ring,n4Ring,n5Ring,n6Ring,n7Ring,n8Ring,n9Ring,n10Ring,n11Ring,n12Ring,nG12Ring,nHRing,n3HRing,n4HRing,n5HRing,n6HRing,n7HRing,n8HRing,n9HRing,n10HRing,n11HRing,n12HRing,nG12HRing,naRing,n3aRing,n4aRing,n5aRing,n6aRing,n7aRing,n8aRing,n9aRing,n10aRing,n11aRing,n12aRing,nG12aRing,naHRing,n3aHRing,n4aHRing,n5aHRing,n6aHRing,n7aHRing,n8aHRing,n9aHRing,n10aHRing,n11aHRing,n12aHRing,nG12aHRing,nARing,n3ARing,n4ARing,n5ARing,n6ARing,n7ARing,n8ARing,n9ARing,n10ARing,n11ARing,n12ARing,nG12ARing,nAHRing,n3AHRing,n4AHRing,n5AHRing,n6AHRing,n7AHRing,n8AHRing,n9AHRing,n10AHRing,n11AHRing,n12AHRing,nG12AHRing,nFRing,n4FRing,n5FRing,n6FRing,n7FRing,n8FRing,n9FRing,n10FRing,n11FRing,n12FRing,nG12FRing,nFHRing,n4FHRing,n5FHRing,n6FHRing,n7FHRing,n8FHRing,n9FHRing,n10FHRing,n11FHRing,n12FHRing,nG12FHRing,nFaRing,n4FaRing,n5FaRing,n6FaRing,n7FaRing,n8FaRing,n9FaRing,n10FaRing,n11FaRing,n12FaRing,nG12FaRing,nFaHRing,n4FaHRing,n5FaHRing,n6FaHRing,n7FaHRing,n8FaHRing,n9FaHRing,n10FaHRing,n11FaHRing,n12FaHRing,nG12FaHRing,nFARing,n4FARing,n5FARing,n6FARing,n7FARing,n8FARing,n9FARing,n10FARing,n11FARing,n12FARing,nG12FARing,nFAHRing,n4FAHRing,n5FAHRing,n6FAHRing,n7FAHRing,n8FAHRing,n9FAHRing,n10FAHRing,n11FAHRing,n12FAHRing,nG12FAHRing,nRot,SLogP,SMR,TopoPSA(NO),TopoPSA,GGI1,GGI2,GGI3,GGI4,GGI5,GGI6,GGI7,GGI8,GGI9,GGI10,JGI1,JGI2,JGI3,JGI4,JGI5,JGI6,JGI7,JGI8,JGI9,JGI10,JGT10,Diameter,Radius,MWC01,MWC02,MWC03,MWC04,MWC05,MWC06,MWC07,MWC08,MWC09,MWC10,TMWC10,SRW02,SRW03,SRW04,SRW05,SRW06,SRW07,SRW08,SRW09,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb2
0,23.162435,18.322051,0,0,38.352025,2.405459,4.633113,38.352025,1.237162,4.31797,3.489499,0.112564,2.38116,2534.251108,81.750036,6,6,71,31,0,0,5,40,0,26,0,5,0,0,0,0,0,0,0,334.0,293.0,359.0,326.0,304.0,305.0,315.0,265.0,261.0,186.0,238.0,388.0,444.0,436.0,412.0,399.0,389.0,379.0,1296.0,1470.0,2109.0,2255.0,2165.0,2042.0,2040.0,1915.0,1739.0,5071.349711,5400.194312,7469.097873,7554.628723,7175.698468,6728.006682,6874.624535,6269.764255,5682.487945,13336.743108,17319.112156,25119.276534,28104.486253,27166.201585,25885.525612,25128.269697,24362.700584,22810.301338,531.550556,548.022608,998.154804,1445.133988,1505.710528,1463.009208,1386.496148,1386.323932,1342.357852,421.833,451.971,807.0306,1143.932,1183.7621,1148.09,1088.4711,1088.0806,1046.597,417.35,441.6,792.15,1129.91,1170.32,1135.76,1075.71,1076.21,1035.41,93.511936,123.352883,187.800604,219.954598,217.739128,207.447586,200.714774,197.120486,186.800316,11620.615768,10437.800324,20215.163123,31017.085622,33008.349905,32211.594363,30603.813583,30509.751737,30150.601342,4.704225,4.069444,2.699248,2.619718,3.305556,2.917293,18.253521,20.416667,15.857143,71.427461,75.002699,56.158631,187.841452,240.543224,188.866741,7.486628,7.611425,7.504923,5.94131,6.277375,6.067899,5.878169,6.133333,5.956015,1.31707,1.713234,1.412035,163.670645,144.969449,151.993708,1.100602,-0.563221,0.095637,-0.181894,0.167435,-0.14615,0.174182,-0.038466,-0.139212,209.549296,39.2317,-23.507836,-82.408649,-48.486015,-26.228328,23.601865,-25.503075,8.499702,33.661972,7.244198,-7.761159,-11.982543,-4.385638,1.219599,-4.724658,2.17199,9.955763,511.549296,-1.430272,-54.606427,-143.155128,-59.514184,-65.988891,65.249752,-84.094624,14.809562,2435.524004,-2.212449,-275.360242,-672.841753,-285.478936,-303.146019,302.024345,-400.573124,82.637659,3595.778728,134.64791,-848.062364,-654.202082,-433.8678,-103.614569,73.311493,-492.60365,465.589595,5.033997,-0.336104,0.645624,-1.60513,-0.160248,-1.352356,0.636742,-0.25217,-0.812706,7.575958,-0.437563,0.667947,-2.581392,-0.469385,-1.921094,1.264184,-0.78212,-0.965553,7.909859,-0.495914,0.855475,-2.661871,-0.400442,-2.08144,1.235303,-0.669488,-1.139355,16.172994,0.830058,-4.424045,-1.963921,-1.687642,0.165423,-0.903561,-1.477526,2.541416,90.258227,4.749431,-24.812183,-9.684333,-8.771142,1.381203,-6.811027,-6.824659,14.215622,0.015501,-0.007823,0.000719,2.951399,0.544885,-0.176751,0.474112,0.100614,-0.058355,7.20492,-0.019865,-0.410575,34.303155,-0.030728,-2.070378,50.644771,1.87011,-6.376409,0.070901,-0.004668,0.004854,0.106704,-0.006077,0.005022,0.111406,-0.006888,0.006432,0.227789,0.011529,-0.033263,1.271243,0.065964,-0.186558,-0.504631,0.046388,-0.002757,-0.056985,-0.000896,-0.060355,0.036926,-0.125905,-0.06584,0.068466,-0.056955,0.047066,-0.061825,0.057736,0.050611,-0.146028,0.05189,-0.146752,1.438071,0.822106,1.041498,1.060247,1.04767,1.069237,1.187875,1.252388,0.889158,0.757339,0.891758,0.799409,0.884972,0.77618,1.256774,1.319229,1.269056,1.325658,0.42183,-0.49522,6.028299,0.988933,3.176609,0.977796,8.029168,5.694127,16.014641,11.705129,20.892484,14.700939,3.714268,2.440125,3.501349,2.244125,3.55528,2.194125,1.983194,0.741453,13.643101,10.954428,1.609205,434.320549,217.785275,332.952272,435.489904,14.048061,217.785275,1.25,5.481242,0.176814,2.832734,190.140688,6.133571,434.355445,217.800886,332.970898,435.521365,14.049076,217.800886,1.246328,5.48123,0.176814,2.832732,190.142894,6.133642,465.687644,231.846336,349.935819,464.078795,14.970284,231.846336,-1.994972,5.469037,0.176421,2.830505,192.115131,6.197262,434.39207,217.817272,332.990447,435.554384,14.050141,217.817272,1.242474,5.481217,0.176813,2.832729,190.145209,6.133716,433.906289,217.599947,332.731206,435.116436,14.036014,217.599947,1.293605,5.481383,0.176819,2.83276,190.1145,6.132726,432.625378,217.026974,332.048183,433.961783,13.998767,217.026974,1.428571,5.481816,0.176833,2.832839,190.033553,6.130115,499.695113,247.141821,368.910687,495.331023,15.97842,247.141821,-5.411471,5.453925,0.175933,2.827738,194.178934,6.263837,437.980697,219.423184,334.909104,438.790519,14.154533,219.423184,0.865671,5.479964,0.176773,2.832501,190.372185,6.141038,657.020986,72,32,64,2,0,6,8,67,5,0.170327,0.112414,0,0,1,7,1,2,13,2,0,0.653846,0.0,0.0,0.078567,0.370308,0.49081,0.0,0.0,0.078567,0.213459,0.248417,1.933293,0.0,0.248006,0.0,1.024562,0.0,0.141622,0.0,3.332069,5.147289,6.344126,1.826576,2.558036,2.881092,14.863081,13.008172,9.39873,7.543503,5.174506,3.204995,2.146811,11.790358,9.091504,6.157417,4.470438,2.888509,1.772376,1.132953,39.333333,36.017068,40.410552,70.410051,67.254902,68.2,44.372287,80.352723,0.553991,0.507283,0.569163,0.991691,0.947252,0.960563,0.624962,1.131728,620.175919,310.08796,470.400915,620.175919,20.005675,310.08796,0.0,5.47568,0.176635,2.831718,190.263041,6.137517,4624,474.716762,237.358381,359.619606,474.716762,15.313444,237.358381,5.471835,0.176511,2.831016,191.49392,6.177223,0,0,0,0,0,0,2,0,9,0,2,5,6,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.690163,0.0,5.970271,0.0,4.101567,10.140683,-1.474784,0.0,0.0,-0.163312,1.220699,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,31.181981,11.543842,5.122224,0.0,0.0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,953,14.666667,0.473118,0.159091,0.272727,0.0,26.0,0.83871,17.5,0.564516,8.5,0.274194,0.0,0.0,38.481445,1.241337,10.459523,0.337404,56.502375,1.822657,14.863081,0.479454,18.020931,0.58132,4.403558,0.14205,0.0,0.026882,0.528638,0.823656,0.436264,0.505485,0.777451,0.092375,0.023153,-0.069221,0.046205,-9.0,-0.290323,0.574413,0.139587,0.0,94.05,0.225352,5,3,1.266674,3.0288,4.507446,5.431162,5.62144,5.649609,89.933849,215.044787,320.028687,385.612495,399.12227,401.12227,0.205972,0.492508,0.732948,0.883152,0.914093,0.918673,0.202125,0.48331,0.71926,0.866658,0.897021,0.901516,4.883073,3.120947,1.642301,0.718585,0.528307,0.500138,11.157499,26.312655,33.144141,36.013677,36.627759,36.656154,112.231902,58.312218,35.615285,25.289142,23.599521,23.482627,-4.162475,363.33,186.69258,20.056445,4.794537,0.0,0.0,0.0,42.483876,82.613311,6.420822,24.415866,0.0,0.0,0.0,0.0,24.850982,0.0,0.0,11.835812,96.050735,0.0,48.047327,0.0,0.0,0.0,45.704753,15.952222,11.835812,70.777499,42.483876,0.0,0.0,0.0,0.0,0.0,18.311899,23.909084,38.52493,19.262465,5.563451,0.0,38.122596,18.208754,4.736863,20.114119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,71.333333,61.604248,1.987234,8.993107,0.2901,52.611141,1.697134,0.0,0.0,8.993107,0.2901,0.0,0.0,41,47,56,58,53,53,54,57,59,541,3.637586,3.998201,4.259153,4.581134,4.835786,4.625585,4.685021,4.776494,4.907125,4.947429,6.921597,74.10172,42.73228,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,4.1864,122.4504,86.99,86.99,6.0,2.888889,2.201389,1.041111,0.836944,0.53483,0.309063,0.286226,0.273758,0.204069,0.1875,0.070461,0.056446,0.02892,0.024616,0.015281,0.00997,0.00987,0.008555,0.006184,0.417802,20,10,32.0,4.990433,5.777652,6.608001,7.416378,8.254529,9.076009,9.92,10.751607,11.600689,137.395298,4.174387,0.0,5.433722,2.397895,6.853299,4.844187,8.351611,6.985642,9.897721,79.938464,432.287574,6.088557,3531,39,146.0,161.0,7.083333


In [121]:
list(train_mordred) == list(test_mordred)

True

In [122]:
train_mordred.index = train.index
test_mordred.index = test.index

In [139]:
train_mordred.shape, test_mordred.shape

((8221, 1071), (2849, 1071))

In [124]:
train_mordred.to_csv('../data/Descriptors/train_mordred.csv')
test_mordred.to_csv('../data/Descriptors/test_mordred.csv')