In [1]:
from rdkit import Chem
from mordred import Calculator, descriptors
import numpy as np
import pandas as pd

Read in the SMILES of the acids and generate Mordred descriptors for them

In [2]:
# Read in the acid smiles
smiles_acids = pd.read_csv("amide_smiles_substrates_acids.csv",index_col=0,header=0).index.to_list()
print(f"{len(smiles_acids)} acid SMILES read in.")

66 acid SMILES read in.


In [3]:
# calculate the mordred descriptors
mordred_list=[]
for compound in smiles_acids:
    # create descriptor calculator with all descriptors
    calc = Calculator(descriptors, ignore_3D=True)
    mol = Chem.MolFromSmiles(compound)
    descript = calc(mol)
    mordred_list.append(descript)
print("Finished calculating descriptors!")

Finished calculating descriptors!


In [4]:
# migrate the data into a dataframe
df_acids = pd.DataFrame(mordred_list,index = smiles_acids, columns = [str(x) for x in Calculator(descriptors,ignore_3D=True).descriptors])
df_acids

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,...,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2
O=C(O)c1cc2ccccc2s1,9.330288,8.453234,1,0,15.365761,2.394250,4.587777,15.365761,1.280480,3.429425,...,9.289798,56.905860,178.008850,9.889381,188,15,62.0,72.0,3.944444,2.638889
O=C(O)c1ccco1,5.835194,6.019630,1,0,9.674988,2.210509,4.274586,9.674988,1.209374,2.984380,...,8.384119,48.703786,112.016044,9.334670,62,6,36.0,39.0,3.222222,1.861111
O=C(O)c1cccc(-c2ccccc2)c1,11.451608,9.788842,1,0,19.572548,2.331546,4.663092,19.572548,1.304837,3.627237,...,9.411892,46.715917,198.068080,7.922723,370,20,74.0,84.0,4.694444,3.388889
CC(C(=O)O)c1ccc(-c2ccccc2)c(F)c1,13.710828,11.699850,1,0,22.772427,2.381929,4.763858,22.772427,1.265135,3.804316,...,9.700085,50.858805,244.089958,7.873870,626,27,90.0,104.0,6.666667,4.027778
O=C(O)C(c1ccccc1)c1ccccc1,12.118275,10.687156,1,0,20.579396,2.358294,4.716589,20.579396,1.286212,3.686348,...,9.487593,48.009261,212.083730,7.574419,423,22,78.0,89.0,4.944444,3.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CCCCOc1ccc(C(=O)O)cc1,10.077834,8.859579,1,0,17.187439,2.258502,4.517004,17.187439,1.227674,3.514325,...,9.016634,44.411164,194.094294,6.931939,358,16,62.0,67.0,5.333333,3.361111
O=C(O)c1ccc(-c2ccccc2)cc1,11.451608,9.568298,1,0,19.577709,2.322303,4.644606,19.577709,1.305181,3.627231,...,9.402035,46.702792,198.068080,7.922723,388,20,74.0,84.0,4.694444,3.388889
O=C(O)C1CCC(F)(F)CC1,8.274351,7.693441,1,0,12.527341,2.311476,4.622953,12.527341,1.138849,3.302522,...,9.182249,41.326257,164.064886,7.812614,154,13,54.0,59.0,5.284722,2.361111
O=C(O)c1ccncc1Cl,7.318357,7.069195,1,0,11.945822,2.267184,4.534368,11.945822,1.194582,3.197666,...,8.912069,39.310842,156.993056,11.213790,114,12,46.0,51.0,4.333333,2.305556


In [5]:
# remove columns with missing/ non-numerical data
removed_columns = []
# Iterate over columns to find any with string values
for col in df_acids.columns:
    if df_acids[col].dtype == object or df_acids[col].apply(lambda x: isinstance(x, str)).any():
        removed_columns.append(col)

# Print names of removed columns
print(f"Removed {len(removed_columns)} columns containing string values (indicating errors and missing values):")
print(f"{removed_columns}")

# Drop the columns from the DataFrame
df_acids.drop(columns=removed_columns,inplace=True)
df_acids

Removed 281 columns containing string values (indicating errors and missing values):
['AATS7dv', 'AATS8dv', 'AATS7d', 'AATS8d', 'AATS7s', 'AATS8s', 'AATS7Z', 'AATS8Z', 'AATS7m', 'AATS8m', 'AATS7v', 'AATS8v', 'AATS7se', 'AATS8se', 'AATS7pe', 'AATS8pe', 'AATS7are', 'AATS8are', 'AATS7p', 'AATS8p', 'AATS7i', 'AATS8i', 'AATSC7c', 'AATSC8c', 'AATSC7dv', 'AATSC8dv', 'AATSC7d', 'AATSC8d', 'AATSC7s', 'AATSC8s', 'AATSC7Z', 'AATSC8Z', 'AATSC7m', 'AATSC8m', 'AATSC7v', 'AATSC8v', 'AATSC7se', 'AATSC8se', 'AATSC7pe', 'AATSC8pe', 'AATSC7are', 'AATSC8are', 'AATSC7p', 'AATSC8p', 'AATSC7i', 'AATSC8i', 'MATS7c', 'MATS8c', 'MATS7dv', 'MATS8dv', 'MATS7d', 'MATS8d', 'MATS7s', 'MATS8s', 'MATS7Z', 'MATS8Z', 'MATS7m', 'MATS8m', 'MATS7v', 'MATS8v', 'MATS7se', 'MATS8se', 'MATS7pe', 'MATS8pe', 'MATS7are', 'MATS8are', 'MATS7p', 'MATS8p', 'MATS7i', 'MATS8i', 'GATS7c', 'GATS8c', 'GATS7dv', 'GATS8dv', 'GATS7d', 'GATS8d', 'GATS7s', 'GATS8s', 'GATS7Z', 'GATS8Z', 'GATS7m', 'GATS8m', 'GATS7v', 'GATS8v', 'GATS7se', 'GATS8s

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,...,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2
O=C(O)c1cc2ccccc2s1,9.330288,8.453234,1,0,15.365761,2.394250,4.587777,15.365761,1.280480,3.429425,...,9.289798,56.905860,178.008850,9.889381,188,15,62.0,72.0,3.944444,2.638889
O=C(O)c1ccco1,5.835194,6.019630,1,0,9.674988,2.210509,4.274586,9.674988,1.209374,2.984380,...,8.384119,48.703786,112.016044,9.334670,62,6,36.0,39.0,3.222222,1.861111
O=C(O)c1cccc(-c2ccccc2)c1,11.451608,9.788842,1,0,19.572548,2.331546,4.663092,19.572548,1.304837,3.627237,...,9.411892,46.715917,198.068080,7.922723,370,20,74.0,84.0,4.694444,3.388889
CC(C(=O)O)c1ccc(-c2ccccc2)c(F)c1,13.710828,11.699850,1,0,22.772427,2.381929,4.763858,22.772427,1.265135,3.804316,...,9.700085,50.858805,244.089958,7.873870,626,27,90.0,104.0,6.666667,4.027778
O=C(O)C(c1ccccc1)c1ccccc1,12.118275,10.687156,1,0,20.579396,2.358294,4.716589,20.579396,1.286212,3.686348,...,9.487593,48.009261,212.083730,7.574419,423,22,78.0,89.0,4.944444,3.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CCCCOc1ccc(C(=O)O)cc1,10.077834,8.859579,1,0,17.187439,2.258502,4.517004,17.187439,1.227674,3.514325,...,9.016634,44.411164,194.094294,6.931939,358,16,62.0,67.0,5.333333,3.361111
O=C(O)c1ccc(-c2ccccc2)cc1,11.451608,9.568298,1,0,19.577709,2.322303,4.644606,19.577709,1.305181,3.627231,...,9.402035,46.702792,198.068080,7.922723,388,20,74.0,84.0,4.694444,3.388889
O=C(O)C1CCC(F)(F)CC1,8.274351,7.693441,1,0,12.527341,2.311476,4.622953,12.527341,1.138849,3.302522,...,9.182249,41.326257,164.064886,7.812614,154,13,54.0,59.0,5.284722,2.361111
O=C(O)c1ccncc1Cl,7.318357,7.069195,1,0,11.945822,2.267184,4.534368,11.945822,1.194582,3.197666,...,8.912069,39.310842,156.993056,11.213790,114,12,46.0,51.0,4.333333,2.305556


In [None]:
# save the descriptors
df_acids.to_csv("./../1_Dataset_Generation/Data_For_Individual_Substrates/amide_mordred_descr_acids.csv",index=True,header=True)

Read in the SMILES of the amines and generate Mordred descriptors for them

In [7]:
# Read in the amine smiles
smiles_amines = pd.read_csv("amide_smiles_substrates_amines.csv",index_col=0,header=0).index.to_list()
print(f"{len(smiles_amines)} amine SMILES read in.")

70 amine SMILES read in.


In [8]:
# calculate the mordred descriptors
mordred_list=[]
for compound in smiles_amines:
    # create descriptor calculator with all descriptors
    calc = Calculator(descriptors, ignore_3D=True)
    mol = Chem.MolFromSmiles(compound)
    descript = calc(mol)
    mordred_list.append(descript)
print("Finished calculating descriptors!")

Finished calculating descriptors!


In [9]:
# migrate the data into a dataframe
df_amines = pd.DataFrame(mordred_list,index = smiles_amines, columns = [str(x) for x in Calculator(descriptors,ignore_3D=True).descriptors])
df_amines

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,...,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2
Cc1ccc(N)nc1,5.875634,5.525875,0,0,9.924777,2.170086,4.340173,9.924777,1.240597,2.979730,...,8.463159,35.730685,108.068748,6.754297,62,7,36.0,38.0,3.222222,1.833333
NCc1ccc(F)cc1F,7.249407,6.976306,0,1,11.945822,2.267184,4.534368,11.945822,1.194582,3.197666,...,8.912069,39.310842,143.054656,8.414980,116,12,46.0,51.0,4.333333,2.361111
NCc1ccc(Cl)cc1,6.473351,6.127583,0,1,11.189957,2.193993,4.387987,11.189957,1.243329,3.089765,...,8.590258,37.289972,141.034527,8.296149,90,9,40.0,43.0,3.472222,2.166667
COc1ccc(CN)cc1,7.071068,6.693944,0,1,12.857279,2.214320,4.428639,12.857279,1.285728,3.188884,...,8.703009,38.792641,137.084064,6.527813,125,11,44.0,48.0,3.722222,2.500000
COc1ccc(N)cn1,6.473351,6.127583,0,0,11.189957,2.193993,4.387987,11.189957,1.243329,3.089765,...,8.590258,37.289972,124.063663,7.297863,90,9,40.0,43.0,3.472222,2.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CC(C)NC(C)C,4.680200,4.942478,0,1,6.828427,2.000000,4.000000,6.828427,0.975490,2.765108,...,7.655864,32.211905,101.120449,4.596384,48,4,26.0,24.0,4.472222,1.666667
Cc1ccc(S(N)(=O)=O)cc1,8.302711,7.763360,0,0,12.527341,2.311476,4.622953,12.527341,1.138849,3.302522,...,9.182249,41.326257,171.035400,8.551770,152,13,54.0,59.0,5.284722,2.333333
Nc1ccc(Cl)cn1,5.875634,5.525875,0,0,9.924777,2.170086,4.340173,9.924777,1.240597,2.979730,...,8.463159,35.730685,128.014126,9.847240,62,7,36.0,38.0,3.222222,1.833333
NCC1CC1,3.535534,3.206125,0,1,6.428639,2.214320,3.889451,6.428639,1.285728,2.639903,...,8.010028,48.538567,71.073499,5.076679,17,2,22.0,24.0,1.861111,1.250000


In [10]:
# remove columns with missing/ non-numerical data
removed_columns = []
# Iterate over columns to find any with string values
for col in df_amines.columns:
    if df_amines[col].dtype == object or df_amines[col].apply(lambda x: isinstance(x, str)).any():
        removed_columns.append(col)

# Print names of removed columns
print(f"Removed {len(removed_columns)} columns containing string values (indicating errors and missing values):")
print(f"{removed_columns}")

# Drop the columns from the DataFrame
df_amines.drop(columns=removed_columns,inplace=True)
df_amines

Removed 387 columns containing string values (indicating errors and missing values):
['AATS5dv', 'AATS6dv', 'AATS7dv', 'AATS8dv', 'AATS5d', 'AATS6d', 'AATS7d', 'AATS8d', 'AATS5s', 'AATS6s', 'AATS7s', 'AATS8s', 'AATS5Z', 'AATS6Z', 'AATS7Z', 'AATS8Z', 'AATS5m', 'AATS6m', 'AATS7m', 'AATS8m', 'AATS5v', 'AATS6v', 'AATS7v', 'AATS8v', 'AATS5se', 'AATS6se', 'AATS7se', 'AATS8se', 'AATS5pe', 'AATS6pe', 'AATS7pe', 'AATS8pe', 'AATS5are', 'AATS6are', 'AATS7are', 'AATS8are', 'AATS5p', 'AATS6p', 'AATS7p', 'AATS8p', 'AATS5i', 'AATS6i', 'AATS7i', 'AATS8i', 'AATSC5c', 'AATSC6c', 'AATSC7c', 'AATSC8c', 'AATSC5dv', 'AATSC6dv', 'AATSC7dv', 'AATSC8dv', 'AATSC5d', 'AATSC6d', 'AATSC7d', 'AATSC8d', 'AATSC5s', 'AATSC6s', 'AATSC7s', 'AATSC8s', 'AATSC5Z', 'AATSC6Z', 'AATSC7Z', 'AATSC8Z', 'AATSC5m', 'AATSC6m', 'AATSC7m', 'AATSC8m', 'AATSC5v', 'AATSC6v', 'AATSC7v', 'AATSC8v', 'AATSC5se', 'AATSC6se', 'AATSC7se', 'AATSC8se', 'AATSC5pe', 'AATSC6pe', 'AATSC7pe', 'AATSC8pe', 'AATSC5are', 'AATSC6are', 'AATSC7are', 'AATSC8

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,...,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2
Cc1ccc(N)nc1,5.875634,5.525875,0,0,9.924777,2.170086,4.340173,9.924777,1.240597,2.979730,...,8.463159,35.730685,108.068748,6.754297,62,7,36.0,38.0,3.222222,1.833333
NCc1ccc(F)cc1F,7.249407,6.976306,0,1,11.945822,2.267184,4.534368,11.945822,1.194582,3.197666,...,8.912069,39.310842,143.054656,8.414980,116,12,46.0,51.0,4.333333,2.361111
NCc1ccc(Cl)cc1,6.473351,6.127583,0,1,11.189957,2.193993,4.387987,11.189957,1.243329,3.089765,...,8.590258,37.289972,141.034527,8.296149,90,9,40.0,43.0,3.472222,2.166667
COc1ccc(CN)cc1,7.071068,6.693944,0,1,12.857279,2.214320,4.428639,12.857279,1.285728,3.188884,...,8.703009,38.792641,137.084064,6.527813,125,11,44.0,48.0,3.722222,2.500000
COc1ccc(N)cn1,6.473351,6.127583,0,0,11.189957,2.193993,4.387987,11.189957,1.243329,3.089765,...,8.590258,37.289972,124.063663,7.297863,90,9,40.0,43.0,3.472222,2.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CC(C)NC(C)C,4.680200,4.942478,0,1,6.828427,2.000000,4.000000,6.828427,0.975490,2.765108,...,7.655864,32.211905,101.120449,4.596384,48,4,26.0,24.0,4.472222,1.666667
Cc1ccc(S(N)(=O)=O)cc1,8.302711,7.763360,0,0,12.527341,2.311476,4.622953,12.527341,1.138849,3.302522,...,9.182249,41.326257,171.035400,8.551770,152,13,54.0,59.0,5.284722,2.333333
Nc1ccc(Cl)cn1,5.875634,5.525875,0,0,9.924777,2.170086,4.340173,9.924777,1.240597,2.979730,...,8.463159,35.730685,128.014126,9.847240,62,7,36.0,38.0,3.222222,1.833333
NCC1CC1,3.535534,3.206125,0,1,6.428639,2.214320,3.889451,6.428639,1.285728,2.639903,...,8.010028,48.538567,71.073499,5.076679,17,2,22.0,24.0,1.861111,1.250000


In [None]:
# save the descriptors
df_amines.to_csv("./../1_Dataset_Generation/Data_For_Individual_Substrates/amide_mordred_descr_amines.csv",index=True,header=True)

Read in the SMILES of the amide products and generate Mordred descriptors for them

In [12]:
# Read in the product smiles
smiles_amide = pd.read_csv("amide_smiles_products.csv",index_col=0,header=0).index.to_list()
print(f"{len(smiles_amide)} amide product SMILES read in.")

632 amide product SMILES read in.


In [13]:
# calculate the mordred descriptors
mordred_list=[]
for compound in smiles_amide:
    # create descriptor calculator with all descriptors
    calc = Calculator(descriptors, ignore_3D=True)
    mol = Chem.MolFromSmiles(compound)
    descript = calc(mol)
    mordred_list.append(descript)
print("Finished calculating descriptors!")

Finished calculating descriptors!


In [14]:
# migrate the data into a dataframe
df_amide = pd.DataFrame(mordred_list,index = smiles_amide, columns = [str(x) for x in Calculator(descriptors,ignore_3D=True).descriptors])
df_amide

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,...,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2
Cc1ccc(NC(=O)c2cc3ccccc3s2)nc1,14.987142,12.089554,0,0,25.057822,2.409377,4.664467,25.057822,1.318833,3.887008,...,9.751676,66.251875,268.067034,8.647324,757,26,100.0,116.0,5.416667,4.138889
O=C(NCc1ccc(F)cc1F)c1ccco1,12.975211,11.464397,0,0,21.368579,2.302776,4.592263,21.368579,1.256975,3.748631,...,9.467073,62.336153,237.060135,9.117697,573,21,84.0,95.0,5.805556,3.805556
O=C(NCc1ccc(Cl)cc1)c1cccc(-c2ccccc2)c1,17.815569,13.586014,0,0,30.488107,2.344173,4.688345,30.488107,1.325570,4.056885,...,9.837935,56.907601,321.092042,8.233129,1356,32,116.0,132.0,6.416667,5.138889
COc1ccc(CNC(=O)C(C)c2ccc(-c3ccccc3)c(F)c2)cc1,20.672506,15.715567,0,0,35.333952,2.387284,4.774568,35.333952,1.308665,4.210188,...,10.063009,61.821053,363.163457,7.411499,2174,41,136.0,157.0,8.638889,6.111111
COc1ccc(NC(=O)c2cccc(-c3ccccc3)c2)cn1,17.706179,13.604439,0,0,30.595467,2.352819,4.705637,30.595467,1.330238,4.057059,...,9.864279,56.958114,304.121178,7.797979,1328,33,116.0,133.0,6.416667,5.222222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
COc1ccc(NC(=O)C2CCN(C(=O)OCc3ccccc3)CC2)cn1,20.643996,15.100847,0,0,35.531837,2.329484,4.658967,35.531837,1.315994,4.206702,...,9.946499,61.563140,369.168856,7.383377,2298,38,134.0,152.0,8.027778,6.138889
CON(C)C(=O)c1c(C)cc(C)cc1C,10.991790,10.649785,0,0,17.552658,2.388499,4.776997,17.552658,1.170177,3.598520,...,9.538996,46.851095,207.125929,6.472685,352,23,72.0,83.0,7.416667,3.444444
Cc1ccc(S(=O)(=O)NC(=O)c2ccc3nc(C)ccc3c2)cc1,18.888293,14.757306,0,0,30.038647,2.413739,4.827478,30.038647,1.251610,4.110196,...,10.156306,58.806143,340.088163,8.502204,1424,38,128.0,149.0,8.590278,5.097222
Cc1ccc(Cl)c(NC(=O)c2ccc3nccnc3c2)c1,16.470305,12.954033,0,0,27.472773,2.383768,4.767535,27.472773,1.308227,3.978931,...,9.933386,54.914940,297.066890,9.002027,970,32,110.0,128.0,6.527778,4.583333


In [15]:
# remove columns with missing/ non-numerical data
removed_columns = []
# Iterate over columns to find any with string values
for col in df_amide.columns:
    if df_amide[col].dtype == object or df_amide[col].apply(lambda x: isinstance(x, str)).any():
        removed_columns.append(col)

# Print names of removed columns
print(f"Removed {len(removed_columns)} columns containing string values (indicating errors and missing values):")
print(f"{removed_columns}")

# Drop the columns from the DataFrame
df_amide.drop(columns=removed_columns,inplace=True)
df_amide

Removed 186 columns containing string values (indicating errors and missing values):
['SpAbs_Dt', 'SpMax_Dt', 'SpDiam_Dt', 'SpAD_Dt', 'SpMAD_Dt', 'LogEE_Dt', 'SM1_Dt', 'VE1_Dt', 'VE2_Dt', 'VE3_Dt', 'VR1_Dt', 'VR2_Dt', 'VR3_Dt', 'DetourIndex', 'MAXsLi', 'MAXssBe', 'MAXssssBe', 'MAXssBH', 'MAXsssB', 'MAXssssB', 'MAXsCH3', 'MAXdCH2', 'MAXssCH2', 'MAXtCH', 'MAXdsCH', 'MAXaaCH', 'MAXsssCH', 'MAXddC', 'MAXtsC', 'MAXaasC', 'MAXaaaC', 'MAXssssC', 'MAXsNH3', 'MAXsNH2', 'MAXssNH2', 'MAXdNH', 'MAXssNH', 'MAXaaNH', 'MAXtN', 'MAXsssNH', 'MAXdsN', 'MAXaaN', 'MAXsssN', 'MAXddsN', 'MAXaasN', 'MAXssssN', 'MAXsOH', 'MAXssO', 'MAXaaO', 'MAXsF', 'MAXsSiH3', 'MAXssSiH2', 'MAXsssSiH', 'MAXssssSi', 'MAXsPH2', 'MAXssPH', 'MAXsssP', 'MAXdsssP', 'MAXsssssP', 'MAXsSH', 'MAXdS', 'MAXssS', 'MAXaaS', 'MAXdssS', 'MAXddssS', 'MAXsCl', 'MAXsGeH3', 'MAXssGeH2', 'MAXsssGeH', 'MAXssssGe', 'MAXsAsH2', 'MAXssAsH', 'MAXsssAs', 'MAXsssdAs', 'MAXsssssAs', 'MAXsSeH', 'MAXdSe', 'MAXssSe', 'MAXaaSe', 'MAXdssSe', 'MAXddssSe', 'MA

Unnamed: 0,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,...,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2
Cc1ccc(NC(=O)c2cc3ccccc3s2)nc1,14.987142,12.089554,0,0,25.057822,2.409377,4.664467,25.057822,1.318833,3.887008,...,9.751676,66.251875,268.067034,8.647324,757,26,100.0,116.0,5.416667,4.138889
O=C(NCc1ccc(F)cc1F)c1ccco1,12.975211,11.464397,0,0,21.368579,2.302776,4.592263,21.368579,1.256975,3.748631,...,9.467073,62.336153,237.060135,9.117697,573,21,84.0,95.0,5.805556,3.805556
O=C(NCc1ccc(Cl)cc1)c1cccc(-c2ccccc2)c1,17.815569,13.586014,0,0,30.488107,2.344173,4.688345,30.488107,1.325570,4.056885,...,9.837935,56.907601,321.092042,8.233129,1356,32,116.0,132.0,6.416667,5.138889
COc1ccc(CNC(=O)C(C)c2ccc(-c3ccccc3)c(F)c2)cc1,20.672506,15.715567,0,0,35.333952,2.387284,4.774568,35.333952,1.308665,4.210188,...,10.063009,61.821053,363.163457,7.411499,2174,41,136.0,157.0,8.638889,6.111111
COc1ccc(NC(=O)c2cccc(-c3ccccc3)c2)cn1,17.706179,13.604439,0,0,30.595467,2.352819,4.705637,30.595467,1.330238,4.057059,...,9.864279,56.958114,304.121178,7.797979,1328,33,116.0,133.0,6.416667,5.222222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
COc1ccc(NC(=O)C2CCN(C(=O)OCc3ccccc3)CC2)cn1,20.643996,15.100847,0,0,35.531837,2.329484,4.658967,35.531837,1.315994,4.206702,...,9.946499,61.563140,369.168856,7.383377,2298,38,134.0,152.0,8.027778,6.138889
CON(C)C(=O)c1c(C)cc(C)cc1C,10.991790,10.649785,0,0,17.552658,2.388499,4.776997,17.552658,1.170177,3.598520,...,9.538996,46.851095,207.125929,6.472685,352,23,72.0,83.0,7.416667,3.444444
Cc1ccc(S(=O)(=O)NC(=O)c2ccc3nc(C)ccc3c2)cc1,18.888293,14.757306,0,0,30.038647,2.413739,4.827478,30.038647,1.251610,4.110196,...,10.156306,58.806143,340.088163,8.502204,1424,38,128.0,149.0,8.590278,5.097222
Cc1ccc(Cl)c(NC(=O)c2ccc3nccnc3c2)c1,16.470305,12.954033,0,0,27.472773,2.383768,4.767535,27.472773,1.308227,3.978931,...,9.933386,54.914940,297.066890,9.002027,970,32,110.0,128.0,6.527778,4.583333


In [None]:
# save the descriptors
df_amide.to_csv("./../1_Dataset_Generation/Data_For_Individual_Substrates/amide_mordred_descr_prods.csv",index=True,header=True)