In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import lazypredict
from lazypredict.Supervised import LazyClassifier
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

  from pandas import MultiIndex, Int64Index


In [2]:
# Set path to this notebook
HERE = Path(_dh[-1])
HDAC1 = Path(HERE).resolve().parents[1]/'input'
output = HERE/'OUTPUT'

In [3]:
df1 = pd.read_pickle(HDAC1/"HDAC1_1024B.csv")
df1 = df1.sample(frac=1).reset_index(drop=True)
df1.head(5)

Unnamed: 0,molecule_chembl_id,fp_MACCS,fp_Morgan3,fp_MorganF,fp_MAP4,pChEMBL_HDAC1
0,CHEMBL474693,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[3192076, 3087414, 1578989, 9391761, 7347340, ...",6.01
1,CHEMBL3918218,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ...","[9593141, 2349897, 1098538, 7467572, 19368319,...",5.42
2,CHEMBL3689854,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[12893645, 6538203, 7998790, 18475925, 9830328...",6.48
3,CHEMBL2022826,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, ...","[145521, 10511, 2475943, 9391761, 9038104, 554...",7.15
4,CHEMBL393117,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, ...","[3300235, 2685452, 7299922, 599760, 28861, 115...",7.04


In [4]:
# Add column for activity
df1["activity"] = np.zeros(len(df1))

# Mark every molecule as active with an pIC50 of >= 6.6, 0 otherwise
df1.loc[df1[df1.pChEMBL_HDAC1 >= 6.6].index, "activity"] = 1.0

# NBVAL_CHECK_OUTPUT
print("Number of active compounds:", int(df1.activity.sum()))
print("Number of inactive compounds:", len(df1) - int(df1.activity.sum()))

Number of active compounds: 2361
Number of inactive compounds: 2131


In [5]:
df1.head(5)

Unnamed: 0,molecule_chembl_id,fp_MACCS,fp_Morgan3,fp_MorganF,fp_MAP4,pChEMBL_HDAC1,activity
0,CHEMBL474693,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[3192076, 3087414, 1578989, 9391761, 7347340, ...",6.01,0.0
1,CHEMBL3918218,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ...","[9593141, 2349897, 1098538, 7467572, 19368319,...",5.42,0.0
2,CHEMBL3689854,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[12893645, 6538203, 7998790, 18475925, 9830328...",6.48,0.0
3,CHEMBL2022826,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, ...","[145521, 10511, 2475943, 9391761, 9038104, 554...",7.15,1.0
4,CHEMBL393117,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, ...","[3300235, 2685452, 7299922, 599760, 28861, 115...",7.04,1.0


#### By using MACCS fingerprints that are 166 bits 

In [6]:
X1 = np.array(list((df1['fp_MACCS']))).astype(float)
#X.shape
Y1 = df1["activity"].values
#y.shape

In [7]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X1, Y1, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train1.shape)
print("Shape of test data:", X_test1.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 167)
Shape of test data: (899, 167)


In [8]:
# Defines and builds the LazyClassifier
clf1 =  LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
model1,predictions1 = clf1.fit(X_train1, X_test1, Y_train1, Y_test1)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [00:20<00:02,  1.01it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:22<00:00,  1.31it/s]


In [9]:
predictions1.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LGBMClassifier,0.83,0.83,0.83,0.83,0.39
XGBClassifier,0.83,0.82,0.82,0.83,0.88
RandomForestClassifier,0.82,0.81,0.81,0.82,0.67
ExtraTreesClassifier,0.81,0.81,0.81,0.81,0.77
NuSVC,0.81,0.81,0.81,0.81,3.05
KNeighborsClassifier,0.81,0.8,0.8,0.81,1.05
BaggingClassifier,0.8,0.8,0.8,0.8,0.42
SVC,0.79,0.79,0.79,0.79,2.74
ExtraTreeClassifier,0.78,0.78,0.78,0.78,0.06
DecisionTreeClassifier,0.76,0.76,0.76,0.76,0.1


### USING 1024 BITS FINGERPRINTS

In [10]:
#By using Morgan fingerprints with radius of 3 and 1024 bits
X2 = np.array(list((df1['fp_Morgan3']))).astype(float)
#X.shape
Y2 = df1["activity"].values
#y.shape

In [11]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X2, Y2, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train2.shape)
print("Shape of test data:", X_test2.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 1024)
Shape of test data: (899, 1024)


In [12]:
clf2 = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models2, predictions2 = clf2.fit(X_train2, X_test2, Y_train2, Y_test2)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [01:26<00:18,  6.21s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [01:31<00:00,  3.17s/it]


In [13]:
predictions2.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreesClassifier,0.84,0.83,0.83,0.84,2.53
RandomForestClassifier,0.83,0.83,0.83,0.83,1.86
LGBMClassifier,0.83,0.82,0.82,0.83,1.8
XGBClassifier,0.83,0.82,0.82,0.82,3.81
BaggingClassifier,0.82,0.82,0.82,0.82,5.0
NuSVC,0.81,0.81,0.81,0.81,19.29
SVC,0.81,0.8,0.8,0.8,17.4
KNeighborsClassifier,0.78,0.78,0.78,0.78,6.77
QuadraticDiscriminantAnalysis,0.78,0.78,0.78,0.78,1.34
RidgeClassifierCV,0.77,0.77,0.77,0.77,1.03


In [14]:
#By using Morgan fingerprints with radius of 3 and 1024 bits and feautures
X3 = np.array(list((df1['fp_MorganF']))).astype(float)
#X.shape
Y3 = df1["activity"].values
#y.shape

In [15]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train3, X_test3, Y_train3, Y_test3 = train_test_split(X3, Y3, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train3.shape)
print("Shape of test data:", X_test3.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 1024)
Shape of test data: (899, 1024)


In [16]:
clf3 = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models3, predictions3 = clf3.fit(X_train3, X_test3, Y_train3, Y_test3)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [01:25<00:18,  6.03s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [01:31<00:00,  3.17s/it]


In [17]:
predictions3.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RandomForestClassifier,0.83,0.82,0.82,0.83,1.87
LGBMClassifier,0.83,0.82,0.82,0.83,1.6
ExtraTreesClassifier,0.82,0.82,0.82,0.82,2.56
NuSVC,0.82,0.82,0.82,0.82,18.87
SVC,0.82,0.82,0.82,0.82,16.84
XGBClassifier,0.81,0.81,0.81,0.81,4.43
BaggingClassifier,0.81,0.81,0.81,0.81,4.47
KNeighborsClassifier,0.79,0.79,0.79,0.79,6.71
QuadraticDiscriminantAnalysis,0.78,0.77,0.77,0.78,1.43
DecisionTreeClassifier,0.78,0.77,0.77,0.78,0.72


In [18]:
#By using MAP4 fingerprints with 1024 bits
X4 = np.array(list((df1['fp_MAP4']))).astype(float)
#X.shape
Y4 = df1["activity"].values
#y.shape

In [19]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train4, X_test4, Y_train4, Y_test4 = train_test_split(X4, Y4, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train4.shape)
print("Shape of test data:", X_test4.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 1024)
Shape of test data: (899, 1024)


In [20]:
# Defines and builds the lazyclassifier
clf4 = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models4, predictions4 = clf4.fit(X_train4, X_test4, Y_train4, Y_test4)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [02:04<00:20,  6.75s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [02:31<00:00,  5.23s/it]


In [21]:
predictions4.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.83,0.83,0.83,0.83,12.08
RandomForestClassifier,0.82,0.82,0.82,0.82,7.5
LGBMClassifier,0.82,0.82,0.82,0.82,15.59
ExtraTreesClassifier,0.82,0.81,0.81,0.82,2.59
BaggingClassifier,0.78,0.78,0.78,0.78,21.34
NuSVC,0.78,0.77,0.77,0.78,19.56
SVC,0.78,0.77,0.77,0.77,17.88
QuadraticDiscriminantAnalysis,0.77,0.77,0.77,0.77,1.31
KNeighborsClassifier,0.77,0.76,0.76,0.76,7.01
AdaBoostClassifier,0.74,0.74,0.74,0.74,14.07


## USE 2048 BITS FINGERPRINTS

In [22]:
df2 = pd.read_pickle(HDAC1/"HDAC1_2048B.csv")
df2 = df2.sample(frac=1).reset_index(drop=True)
df2

Unnamed: 0,molecule_chembl_id,fp_Morgan3,fp_MorganF,fp_MAP4,pChEMBL_HDAC1
0,CHEMBL4177129,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[5976924, 7882477, 6095547, 453610, 30496230, ...",5.54
1,CHEMBL511984,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[952475, 206868, 2417431, 3934995, 28861, 6934...",7.89
2,CHEMBL1083441,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[10517078, 5398951, 8033062, 7467572, 34588650...",7.01
3,CHEMBL3925939,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, ...","[145521, 3650390, 3771845, 2469425, 163308, 85...",8.24
4,CHEMBL4250891,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...","[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, ...","[382204, 3821889, 137380, 2385390, 7332227, 82...",7.20
...,...,...,...,...,...
4487,CHEMBL226817,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[7416012, 1279692, 1475424, 1916654, 13312514,...",6.28
4488,CHEMBL3689797,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[3865478, 3827191, 6095547, 7378093, 2498914, ...",6.23
4489,CHEMBL246589,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[3094461, 783386, 1975369, 5548928, 9038104, 1...",7.32
4490,CHEMBL251336,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, ...","[8732066, 2349897, 8033062, 5974416, 18234500,...",7.25


In [23]:
# Add column for activity
df2["activity"] = np.zeros(len(df2))

# Mark every molecule as active with an pIC50 of >= 6.6, 0 otherwise
df2.loc[df2[df2.pChEMBL_HDAC1 >= 6.6].index, "activity"] = 1.0

# NBVAL_CHECK_OUTPUT
print("Number of active compounds:", int(df2.activity.sum()))
print("Number of inactive compounds:", len(df2) - int(df2.activity.sum()))

Number of active compounds: 2361
Number of inactive compounds: 2131


In [24]:
#2048 bits Morgan fingerprints with radius of 3
X5 = np.array(list((df2['fp_Morgan3']))).astype(float)
#X.shape
Y5 = df2["activity"].values
#y.shape

In [25]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train5, X_test5, Y_train5, Y_test5 = train_test_split(X5, Y5, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train5.shape)
print("Shape of test data:", X_test5.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 2048)
Shape of test data: (899, 2048)


In [26]:
clf5 = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models5, predictions5 = clf5.fit(X_train5, X_test5, Y_train5, Y_test5)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [03:16<00:41, 13.76s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [03:28<00:00,  7.19s/it]


In [27]:
predictions5.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RandomForestClassifier,0.85,0.85,0.85,0.85,3.71
ExtraTreesClassifier,0.84,0.84,0.84,0.84,4.51
LGBMClassifier,0.83,0.83,0.83,0.83,2.95
XGBClassifier,0.82,0.82,0.82,0.82,8.68
SVC,0.82,0.81,0.81,0.82,37.01
BaggingClassifier,0.81,0.81,0.81,0.81,9.89
NuSVC,0.81,0.81,0.81,0.81,43.45
DecisionTreeClassifier,0.78,0.78,0.78,0.78,1.65
ExtraTreeClassifier,0.77,0.77,0.77,0.77,0.52
KNeighborsClassifier,0.77,0.77,0.77,0.77,13.45


In [28]:
#2048 bits Morgan fingerprints with feautures
X6 = np.array(list((df2['fp_MorganF']))).astype(float)
#X.shape
Y6 = df2["activity"].values
#y.shape

In [29]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train6, X_test6, Y_train6, Y_test6 = train_test_split(X6, Y6, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train6.shape)
print("Shape of test data:", X_test6.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 2048)
Shape of test data: (899, 2048)


In [30]:
clf6 = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models6, predictions6 = clf6.fit(X_train6, X_test6, Y_train6, Y_test6)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [03:12<00:40, 13.52s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [03:23<00:00,  7.03s/it]


In [31]:
predictions6.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreesClassifier,0.85,0.85,0.85,0.85,4.57
RandomForestClassifier,0.85,0.84,0.84,0.84,3.2
BaggingClassifier,0.84,0.84,0.84,0.84,8.85
LGBMClassifier,0.83,0.83,0.83,0.83,2.24
XGBClassifier,0.83,0.82,0.82,0.83,9.45
NuSVC,0.82,0.82,0.82,0.82,38.25
SVC,0.82,0.81,0.81,0.81,36.16
DecisionTreeClassifier,0.8,0.79,0.79,0.8,1.51
ExtraTreeClassifier,0.79,0.79,0.79,0.79,0.54
KNeighborsClassifier,0.77,0.77,0.77,0.77,13.04


In [32]:
#2048 bits MAP4 fingerprints
X7 = np.array(list((df2['fp_MAP4']))).astype(float)
#X.shape
Y7 = df2["activity"].values
#y.shape

In [33]:
# Split the data into training and test set
# Perform data splitting using 80/20 ratio
X_train7, X_test7, Y_train7, Y_test7 = train_test_split(X7, Y7, test_size=0.2, random_state=42)

# Print the shape of training and testing data
print("Shape of training data:", X_train7.shape)
print("Shape of test data:", X_test7.shape)
# NBVAL_CHECK_OUTPUT

Shape of training data: (3593, 2048)
Shape of test data: (899, 2048)


In [34]:
# Defines and builds the lazyclassifier
clf7 = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models7, predictions7 = clf7.fit(X_train7, X_test7, Y_train7, Y_test7)

 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [04:26<00:42, 14.22s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [05:24<00:00, 11.20s/it]


In [35]:
predictions7.sort_values(by="F1 Score", ascending=False).head(10)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.86,0.85,0.85,0.86,25.58
RandomForestClassifier,0.84,0.84,0.84,0.84,9.73
LGBMClassifier,0.84,0.84,0.84,0.84,32.96
ExtraTreesClassifier,0.83,0.83,0.83,0.83,3.96
BaggingClassifier,0.82,0.82,0.82,0.82,48.83
NuSVC,0.82,0.81,0.81,0.82,38.58
SVC,0.81,0.8,0.8,0.8,37.07
KNeighborsClassifier,0.8,0.79,0.79,0.8,13.99
AdaBoostClassifier,0.75,0.75,0.75,0.75,30.98
Perceptron,0.74,0.74,0.74,0.74,0.83


In [36]:
with pd.ExcelWriter(output/"LazyPredictResults.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index 
    # to store the dataframe in specified sheet
    predictions1.to_excel(writer, sheet_name="MACCS", )
    predictions2.to_excel(writer, sheet_name="Morgan 1024 Bits", )
    predictions3.to_excel(writer, sheet_name="MorganF 1024 Bits", )
    predictions4.to_excel(writer, sheet_name="MAP4 1024 Bits", )
    predictions5.to_excel(writer, sheet_name="Morgan 2048 Bits", )
    predictions6.to_excel(writer, sheet_name="MorganF 2048 Bits", )
    predictions7.to_excel(writer, sheet_name="MAP4 2048 Bits", )
    