# 1. Import library
Note: Combined from various sources, much more comprehensive than the original code provided. Import 也很讲究 XDXD

Also note: For this "Ab_Virus_02_Model_Selection_v01.ipynb" Jupyter Notebook, you can switch back to your normal pip3 or conda environment.

### 1.1 Import OS and Path

In [1]:
import os
from pathlib import Path

### 1.2 Import data structures

In [2]:
import numpy as np
from numpy import arange, logspace
import pandas as pd
import multiprocessing
import logging
import csv
import json

### 1.3 Import visualisation tools

In [3]:
import seaborn as sb
sb.set()
from matplotlib import pyplot
import matplotlib.pyplot as plt
%matplotlib inline

### 1.4 Import Scikit Learn - data analytics

In [4]:
# The code for featurization was borrowed from deepchem. Please refer https://deepchem.io for more information
from scipy import stats
from scipy.stats import randint, uniform
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV, KFold, cross_val_score, StratifiedKFold, RepeatedStratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, average_precision_score, mean_squared_error, r2_score, precision_score,recall_score, f1_score
from sklearn.model_selection import cross_val_score

### 1.5 Import Scikit Learn - classifiers

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

### 1.5 Import XGBoost

In [6]:
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from xgboost import plot_importance
from xgboost import plot_importance

### 1.6 Import other classifiers

In [7]:
import lightgbm as lgb
from lightgbm import LGBMClassifier
import catboost as cb
from catboost import CatBoostClassifier

### 1.7 Import Pytorch Tabnet

In [8]:
import torch
from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig

# 2. Data structures loading and preparation

### 2.1 Load NumPy files from directory

In [9]:
CURRENT_PATH = os.getcwd()
print(CURRENT_PATH)

mean_final_elementwise_sum = np.load(os.path.join(os.getcwd(), "mean_final_elementwise_sum.npy"))
print("Loaded file 'mean_final_elementwise_sum.npy'")

mean_final_concatenate = np.load(os.path.join(os.getcwd(), "mean_final_concatenate.npy"))
print("Loaded file 'mean_final_concatenate.npy'")

IC50_class = np.load(os.path.join(os.getcwd(), "IC50_class.npy"))
print("Loaded file 'IC50_class.npy'")

mean_final_additional_elementwise_sum = np.load(os.path.join(os.getcwd(), "mean_final_additional_elementwise_sum.npy"))
print("Loaded file 'mean_final_additional_elementwise_sum.npy'")

mean_final_additional_concatenate = np.load(os.path.join(os.getcwd(), "mean_final_additional_concatenate.npy"))
print("Loaded file 'mean_final_additional_concatenate.npy'")

print(mean_final_elementwise_sum.shape)
print(mean_final_concatenate.shape)
print(IC50_class.shape)
print(mean_final_additional_elementwise_sum.shape)
print(mean_final_additional_concatenate.shape)

/Users/Joshua/Dropbox/NTU_studies/2021_22_Year_2/URECA/Reading/PotentialAB
Loaded file 'mean_final_elementwise_sum.npy'
Loaded file 'mean_final_concatenate.npy'
Loaded file 'IC50_class.npy'
Loaded file 'mean_final_additional_elementwise_sum.npy'
Loaded file 'mean_final_additional_concatenate.npy'
(1933, 37)
(1933, 74)
(1933,)
(414, 37)
(414, 74)


### 2.2 CSV file preparation

In [10]:
csv_file_name = "VirusNet_additional_predict.csv"
print(csv_file_name)

VirusNet_additional_predict.csv


# 3. Model 1: Random Forest Classifier

### 3.1 Apply Random Forest Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [11]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [12]:
# Random Forest Classifier
rf = RandomForestClassifier() # change the classifier here

# Model fitting and training
rf.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = rf.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

7 out of 414 samples are predicted to be neutralising


In [13]:
df = pd.read_csv(csv_file_name)
df["rf_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 3.2 Apply Random Forest Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_concatenate

In [14]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [15]:
# Random Forest Classifier
rf = RandomForestClassifier() # change the classifier here

# Model fitting and training
rf.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = rf.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

3 out of 414 samples are predicted to be neutralising


In [16]:
df = pd.read_csv(csv_file_name)
df["rf_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 4. Model 2: Decision Tree Classifier

### 4.1 Apply Decision Tree Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [17]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [18]:
# Decision Tree Classifier
dectree = DecisionTreeClassifier() # change the classifier here

# Model fitting and training
dectree.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = dectree.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

23 out of 414 samples are predicted to be neutralising


In [19]:
df = pd.read_csv(csv_file_name)
df["dectree_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 4.2 Apply Decision Tree Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_concatenate

In [20]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [21]:
# Decision Tree Classifier
dectree = DecisionTreeClassifier() # change the classifier here

# Model fitting and training
dectree.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = dectree.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

21 out of 414 samples are predicted to be neutralising


In [22]:
df = pd.read_csv(csv_file_name)
df["dectree_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 5. Model 3: Logistic Regression

### 5.1 Apply Logistic Regression with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [23]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [24]:
# Logistic Regression
LR = LogisticRegression() # change the classifier here

# Model fitting and training
LR.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = LR.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

68 out of 414 samples are predicted to be neutralising


In [25]:
df = pd.read_csv(csv_file_name)
df["lr_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 5.2 Apply Logistic Regression with 5 K-Fold, X input: mean_final_concatenate

In [26]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [27]:
# Logistic Regression
LR = LogisticRegression() # change the classifier here

# Model fitting and training
LR.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = LR.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

79 out of 414 samples are predicted to be neutralising


In [28]:
df = pd.read_csv(csv_file_name)
df["lr_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 6. Model 4: Support Vector Machine

### 6.1 Apply Support Vector Machine with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [29]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [30]:
# Support Vector Machine
SVM = LinearSVC() # change the classifier here

# Model fitting and training
SVM.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = SVM.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

51 out of 414 samples are predicted to be neutralising




In [31]:
df = pd.read_csv(csv_file_name)
df["svm_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 6.2 Apply Support Vector Machine with Repeated Stratified 5-Fold 10 times, X input: mean_final_concatenate

In [32]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [33]:
# Support Vector Machine
SVM = LinearSVC() # change the classifier here

# Model fitting and training
SVM.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = SVM.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

20 out of 414 samples are predicted to be neutralising




In [34]:
df = pd.read_csv(csv_file_name)
df["svm_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 7. Model 5: MLP Classifier

### 7.1 Apply MLP Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [35]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [36]:
# MLP Classifier
MLP = MLPClassifier() # change the classifier here

# Model fitting and training
MLP.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = MLP.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

122 out of 414 samples are predicted to be neutralising




In [37]:
df = pd.read_csv(csv_file_name)
df["mlp_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 7.2 Apply MLP Classifier with Repeated Stratified 5-Fold 10 times X input: mean_final_concatenate

In [38]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [39]:
# MLP Classifier
MLP = MLPClassifier() # change the classifier here

# Model fitting and training
MLP.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = MLP.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

3 out of 414 samples are predicted to be neutralising


In [40]:
df = pd.read_csv(csv_file_name)
df["mlp_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 8. Model 6: XGBoost Classifier

### 8.1 Apply XGBoost Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [41]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [42]:
# XGBoost Classifier
XGB = XGBClassifier() # change the classifier here

# Model fitting and training
XGB.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = XGB.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")



16 out of 414 samples are predicted to be neutralising


In [43]:
df = pd.read_csv(csv_file_name)
df["xgb_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 8.2 Apply XGBoost Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_concatenate

In [44]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [45]:
# XGBoost Classifier
XGB = XGBClassifier() # change the classifier here

# Model fitting and training
XGB.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = XGB.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

8 out of 414 samples are predicted to be neutralising


In [46]:
df = pd.read_csv(csv_file_name)
df["xgb_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 9. Model 7: LightGBM Classifier

### 9.1 Apply LightGBM Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [47]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [48]:
# LightGBM Classifier
lgbm = LGBMClassifier() # change the classifier here

# Model fitting and training
lgbm.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = lgbm.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

9 out of 414 samples are predicted to be neutralising


In [49]:
df = pd.read_csv(csv_file_name)
df["lgbm_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 9.2 Apply LightGBM Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_concatenate

In [50]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [51]:
# LightGBM Classifier
lgbm = LGBMClassifier() # change the classifier here

# Model fitting and training
lgbm.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = lgbm.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

8 out of 414 samples are predicted to be neutralising


In [52]:
df = pd.read_csv(csv_file_name)
df["lgbm_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 10. Model 8: CatBoost Classifier

### 10.1 Apply CatBoost Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [53]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [54]:
# CatBoost Classifier
cat = CatBoostClassifier() # change the classifier here

# Model fitting and training
cat.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = cat.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

Learning rate set to 0.013651
0:	learn: 0.6783226	total: 54.3ms	remaining: 54.3s
1:	learn: 0.6619250	total: 57.1ms	remaining: 28.5s
2:	learn: 0.6421035	total: 61.1ms	remaining: 20.3s
3:	learn: 0.6256055	total: 64.6ms	remaining: 16.1s
4:	learn: 0.6102344	total: 66.5ms	remaining: 13.2s
5:	learn: 0.5956888	total: 68.4ms	remaining: 11.3s
6:	learn: 0.5821312	total: 70.7ms	remaining: 10s
7:	learn: 0.5700984	total: 73.2ms	remaining: 9.07s
8:	learn: 0.5561662	total: 75.3ms	remaining: 8.29s
9:	learn: 0.5436816	total: 77.2ms	remaining: 7.65s
10:	learn: 0.5320264	total: 79.1ms	remaining: 7.12s
11:	learn: 0.5213771	total: 80.9ms	remaining: 6.66s
12:	learn: 0.5093984	total: 82.8ms	remaining: 6.29s
13:	learn: 0.4995777	total: 86.4ms	remaining: 6.08s
14:	learn: 0.4888291	total: 89.1ms	remaining: 5.85s
15:	learn: 0.4796548	total: 91.8ms	remaining: 5.64s
16:	learn: 0.4691540	total: 94.1ms	remaining: 5.44s
17:	learn: 0.4604280	total: 96.1ms	remaining: 5.24s
18:	learn: 0.4521552	total: 98ms	remaining: 5.

In [55]:
df = pd.read_csv(csv_file_name)
df["cat_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 10.2 Apply CatBoost Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_concatenate

In [56]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [57]:
# CatBoost Classifier
cat = CatBoostClassifier() # change the classifier here

# Model fitting and training
cat.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = cat.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

Learning rate set to 0.013651
0:	learn: 0.6773585	total: 2.83ms	remaining: 2.83s
1:	learn: 0.6580459	total: 5.69ms	remaining: 2.84s
2:	learn: 0.6430080	total: 9.48ms	remaining: 3.15s
3:	learn: 0.6289088	total: 12.3ms	remaining: 3.05s
4:	learn: 0.6146373	total: 14.8ms	remaining: 2.95s
5:	learn: 0.6005854	total: 17.9ms	remaining: 2.96s
6:	learn: 0.5850254	total: 21ms	remaining: 2.98s
7:	learn: 0.5726483	total: 24.1ms	remaining: 2.99s
8:	learn: 0.5607903	total: 26.7ms	remaining: 2.94s
9:	learn: 0.5479939	total: 29ms	remaining: 2.87s
10:	learn: 0.5324343	total: 32.1ms	remaining: 2.88s
11:	learn: 0.5202255	total: 34.7ms	remaining: 2.85s
12:	learn: 0.5081936	total: 36.7ms	remaining: 2.79s
13:	learn: 0.4949401	total: 39.6ms	remaining: 2.79s
14:	learn: 0.4838134	total: 41.6ms	remaining: 2.73s
15:	learn: 0.4733487	total: 43.6ms	remaining: 2.68s
16:	learn: 0.4635664	total: 45.5ms	remaining: 2.63s
17:	learn: 0.4543145	total: 48ms	remaining: 2.62s
18:	learn: 0.4457778	total: 50.7ms	remaining: 2.62

In [58]:
df = pd.read_csv(csv_file_name)
df["cat_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)

# 11. Model 9: TabNet Classifier

### 11.1 Apply TabNet Classifier with Repeated Stratified 5-Fold 10 times, X input: mean_final_elementwise_sum

In [59]:
# Classifying X and Y
X_train_elementwise_sum = mean_final_elementwise_sum
# X_train_concatenate = mean_final_concatenate
y_train = IC50_class
X_test_elementwise_sum = mean_final_additional_elementwise_sum
# X_test_concatenate = mean_final_additional_concatenate

In [60]:
# TabNet Classifier
tn = TabNetClassifier() # change the classifier here

# Model fitting and training
tn.fit(X_train_elementwise_sum, y_train)

# Make predictions for validation data
y_pred_elementwise_sum = tn.predict(X_test_elementwise_sum)
predictions_elementwise_sum = [round(value) for value in y_pred_elementwise_sum]

print(str(sum(predictions_elementwise_sum)) + " out of 414 samples are predicted to be neutralising")

Device used : cpu
No early stopping will be performed, last training weights will be used.
epoch 0  | loss: 1.24061 |  0:00:00s
epoch 1  | loss: 0.81585 |  0:00:00s
epoch 2  | loss: 0.59772 |  0:00:00s
epoch 3  | loss: 0.52862 |  0:00:00s
epoch 4  | loss: 0.48116 |  0:00:00s
epoch 5  | loss: 0.44787 |  0:00:01s
epoch 6  | loss: 0.40511 |  0:00:01s
epoch 7  | loss: 0.3693  |  0:00:01s
epoch 8  | loss: 0.32362 |  0:00:01s
epoch 9  | loss: 0.3058  |  0:00:01s
epoch 10 | loss: 0.30237 |  0:00:02s
epoch 11 | loss: 0.2635  |  0:00:02s
epoch 12 | loss: 0.23986 |  0:00:02s
epoch 13 | loss: 0.23337 |  0:00:02s
epoch 14 | loss: 0.23339 |  0:00:02s
epoch 15 | loss: 0.22693 |  0:00:02s
epoch 16 | loss: 0.20697 |  0:00:03s
epoch 17 | loss: 0.22009 |  0:00:03s
epoch 18 | loss: 0.21497 |  0:00:03s
epoch 19 | loss: 0.20564 |  0:00:03s
epoch 20 | loss: 0.19516 |  0:00:03s
epoch 21 | loss: 0.201   |  0:00:03s
epoch 22 | loss: 0.19174 |  0:00:04s
epoch 23 | loss: 0.20013 |  0:00:04s
epoch 24 | loss: 0.19

In [61]:
df = pd.read_csv(csv_file_name)
df["tn_elementwise_sum"] = predictions_elementwise_sum[0:112] + [-1] + predictions_elementwise_sum[112:263] + [-1] + predictions_elementwise_sum[263:]
df.to_csv(csv_file_name, index=False)

### 11.2 Apply TabNet Classifier with Repeated Stratified 5-Fold 10 timesd, X input: mean_final_concatenate

In [62]:
# Classifying X and Y
# X_train_elementwise_sum = mean_final_elementwise_sum
X_train_concatenate = mean_final_concatenate
y_train = IC50_class
# X_test_elementwise_sum = mean_final_additional_elementwise_sum
X_test_concatenate = mean_final_additional_concatenate

In [63]:
# TabNet Classifier
tn = TabNetClassifier() # change the classifier here

# Model fitting and training
tn.fit(X_train_concatenate, y_train)

# Make predictions for validation data
y_pred_concatenate = tn.predict(X_test_concatenate)
predictions_concatenate = [round(value) for value in y_pred_concatenate]

print(str(sum(predictions_concatenate)) + " out of 414 samples are predicted to be neutralising")

Device used : cpu
No early stopping will be performed, last training weights will be used.
epoch 0  | loss: 0.65673 |  0:00:00s
epoch 1  | loss: 0.53243 |  0:00:00s
epoch 2  | loss: 0.46529 |  0:00:00s
epoch 3  | loss: 0.41177 |  0:00:00s
epoch 4  | loss: 0.34119 |  0:00:00s
epoch 5  | loss: 0.31205 |  0:00:01s
epoch 6  | loss: 0.28405 |  0:00:01s
epoch 7  | loss: 0.25664 |  0:00:01s
epoch 8  | loss: 0.22645 |  0:00:01s
epoch 9  | loss: 0.2275  |  0:00:01s
epoch 10 | loss: 0.23744 |  0:00:02s
epoch 11 | loss: 0.20697 |  0:00:02s
epoch 12 | loss: 0.21313 |  0:00:02s
epoch 13 | loss: 0.20727 |  0:00:02s
epoch 14 | loss: 0.18288 |  0:00:02s
epoch 15 | loss: 0.17519 |  0:00:03s
epoch 16 | loss: 0.17252 |  0:00:03s
epoch 17 | loss: 0.16345 |  0:00:03s
epoch 18 | loss: 0.16107 |  0:00:03s
epoch 19 | loss: 0.16147 |  0:00:04s
epoch 20 | loss: 0.15309 |  0:00:04s
epoch 21 | loss: 0.14919 |  0:00:04s
epoch 22 | loss: 0.16091 |  0:00:04s
epoch 23 | loss: 0.16394 |  0:00:05s
epoch 24 | loss: 0.16

In [64]:
df = pd.read_csv(csv_file_name)
df["tn_concatenate"] = predictions_concatenate[0:112] + [-1] + predictions_concatenate[112:263] + [-1] + predictions_concatenate[263:]
df.to_csv(csv_file_name, index=False)