# ML Conventional Methods for Bioactivity Prediction

## BACE - Classification | Scaffold Splitting | MACCS Features

##### Import libraries

- Helper function: load, split dataset, generate fingerprint

- Load model from scikit-learn, torch

- Load hyperopt module for hyperparameter tuning

In [1]:
import os
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['VECLIB_MAXIMUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'
import numpy as np
import pandas as pd
from helper.load_dataset import load_bace_classification
from helper.preprocess import split_train_valid_test
from helper.features_ml import smi_maccs
from helper.cal_metrics import classification_metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from xgboost import XGBClassifier
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from hyperopt import hp, tpe, fmin, Trials, space_eval
from hyperopt.pyll import scope
from tabulate import tabulate

In [2]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x2276a3ea370>

### Training pipeline

##### Scaffold Splitting - BACE Classification Task

- Load, split dataset

- Generate fingerprint, defined target

In [3]:
# Load dataset
bace_class = load_bace_classification()

# Split dataset
train, valid, test = split_train_valid_test(bace_class, type='scaffold')
merge = pd.concat((train, valid))

# Generate fingerprint
train_smis = train['SMILES']
valid_smis = valid['SMILES']
test_smis = test['SMILES']
merge_smis = merge['SMILES']
X_train = [smi_maccs(smi) for smi in train_smis]
X_valid = [smi_maccs(smi) for smi in valid_smis]
X_test = [smi_maccs(smi) for smi in test_smis]
X_merge = [smi_maccs(smi) for smi in merge_smis]


# Target defined
y_train = train['Class']
y_valid = valid['Class']
y_test = test['Class']
y_merge = merge['Class']

##### Hyperparameter tuning and model training

- Pipeline: Hyperparameter tuning using valid set -> Train best params on train + valid -> Test on test set

- Models to try:

    - Support Vector Machine
    - Random Forest
    - XGBoost
    - Deep Neural Network

#### SVM

In [4]:
# Hyperparameters tuning with Hyperopt
trials = Trials()

svm_search_space = {
    'C': hp.loguniform('C', np.log(0.1), np.log(10)),
    # 'epsilon': hp.uniform('epsilon', 0.01, 0.2),
    'kernel': hp.choice('kernel', ['linear', 'rbf', 'poly'])
}

def svm_objective(params):
    model = SVC(
        C=params['C'],
        kernel=params['kernel'],
        random_state=SEED
    )
    model.fit(X_train, y_train)
    y_valid_hat = model.predict(X_valid)
    f1 = f1_score(y_valid, y_valid_hat)
    return -f1

best_svm_params = fmin(
    fn=svm_objective,
    space=svm_search_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials
)

best_svm_params = space_eval(svm_search_space, best_svm_params)

model = SVC(**best_svm_params, probability=True, random_state=SEED)
model.fit(X_merge, y_merge)
y_train_pred = model.predict(X_merge)
y_train_score = model.predict_proba(X_merge)[:, 1]
y_test_pred = model.predict(X_test)
y_test_score = model.predict_proba(X_test)[:, 1]

# Calculate metrics

train_metrics = classification_metrics(y_merge, y_train_pred, y_train_score)
test_metrics = classification_metrics(y_test, y_test_pred, y_test_score)

# Print

result_header = ['Metrics', 'Train', 'Test']
result_body = [
    ["Accuracy", f'{train_metrics['accuracy']:.4f}', f'{test_metrics['accuracy']:.4f}'],
    ["Recall"],
    ["Overall recall", f'{train_metrics['recall']:.4f}', f'{test_metrics['recall']:.4f}'],
    ["Class 0 recall", f'{train_metrics['0_recall']:.4f}', f'{test_metrics['0_recall']:.4f}'],
    ["Class 1 recall", f'{train_metrics['1_recall']:.4f}', f'{test_metrics['1_recall']:.4f}'],
    ["Precision", '', ''],
    ["Overall precision", f'{train_metrics['precision']:.4f}', f'{test_metrics['precision']:.4f}'],
    ["Class 0 precision", f'{train_metrics['0_precision']:.4f}', f'{test_metrics['0_precision']:.4f}'],
    ["Class 1 precision", f'{train_metrics['1_precision']:.4f}', f'{test_metrics['1_precision']:.4f}'],
    ["AUC-ROC", f'{train_metrics['auc-roc']:.4f}', f'{test_metrics['auc-roc']:.4f}'],
    ["AUC-PRC", f'{train_metrics['auc-prc']:.4f}', f'{test_metrics['auc-prc']:.4f}'],
]

print('\nSVM Results:\n')
print(f'Best params: {best_svm_params}\n')
print(tabulate(result_body, headers=result_header, tablefmt='grid'))

with open('results/bace_class_scaffold_maccs_svm.txt', 'w') as file:
    file.write('BACE classification | Scaffold Splitting | MACCS Features | SVM Model\n\n')
    file.write('Results:\n\n')
    file.write(f'Best params: {best_svm_params}\n\n')
    file.write(tabulate(result_body, headers=result_header, tablefmt='grid'))

100%|██████████| 100/100 [00:04<00:00, 23.59trial/s, best loss: -0.7837837837837838]

SVM Results:

Best params: {'C': 0.6448662503199115, 'kernel': 'poly'}

+-------------------+---------+--------+
| Metrics           | Train   | Test   |
| Accuracy          | 0.8685  | 0.6776 |
+-------------------+---------+--------+
| Recall            |         |        |
+-------------------+---------+--------+
| Overall recall    | 0.8685  | 0.6776 |
+-------------------+---------+--------+
| Class 0 recall    | 0.8735  | 0.9718 |
+-------------------+---------+--------+
| Class 1 recall    | 0.8623  | 0.4198 |
+-------------------+---------+--------+
| Precision         |         |        |
+-------------------+---------+--------+
| Overall precision | 0.8688  | 0.7811 |
+-------------------+---------+--------+
| Class 0 precision | 0.8865  | 0.5948 |
+-------------------+---------+--------+
| Class 1 precision | 0.8470  | 0.9444 |
+-------------------+---------+--------+
| AUC-ROC           | 

#### Random Forest

In [5]:
# Hyperparameters tuning with Hyperopt
n_feats = len(X_train[0])
trials = Trials()

rf_search_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 5, 100, 5)),
    'max_depth': scope.int(hp.quniform('max_depth', 2, 20, 1)),
    'max_features': scope.int(hp.quniform('max_features', 5, int(n_feats/2), 5)),
    'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 2, 20, 1)),
    'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 20, 1))
}

def rf_objective(params):
    model = RandomForestClassifier(
        n_estimators=params['n_estimators'], 
        max_depth=params['max_depth'], 
        max_features=params['max_features'], 
        min_samples_leaf=params['min_samples_leaf'], 
        min_samples_split=params['min_samples_split'],
        random_state=SEED,
        n_jobs=1
    )
    model.fit(X_train, y_train)
    y_valid_hat = model.predict(X_valid)
    f1 = f1_score(y_valid, y_valid_hat)
    return -f1

best_rf_params = fmin(
    fn=rf_objective,
    space=rf_search_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials
)

best_rf_params = space_eval(rf_search_space, best_rf_params)

model = RandomForestClassifier(**best_rf_params, random_state=SEED)
model.fit(X_merge, y_merge)
y_train_pred = model.predict(X_merge)
y_train_score = model.predict_proba(X_merge)[:, 1]
y_test_pred = model.predict(X_test)
y_test_score = model.predict_proba(X_test)[:, 1]

# Calculate metrics

train_metrics = classification_metrics(y_merge, y_train_pred, y_train_score)
test_metrics = classification_metrics(y_test, y_test_pred, y_test_score)

# Print

result_header = ['Metrics', 'Train', 'Test']
result_body = [
    ["Accuracy", f'{train_metrics['accuracy']:.4f}', f'{test_metrics['accuracy']:.4f}'],
    ["Recall"],
    ["Overall recall", f'{train_metrics['recall']:.4f}', f'{test_metrics['recall']:.4f}'],
    ["Class 0 recall", f'{train_metrics['0_recall']:.4f}', f'{test_metrics['0_recall']:.4f}'],
    ["Class 1 recall", f'{train_metrics['1_recall']:.4f}', f'{test_metrics['1_recall']:.4f}'],
    ["Precision", '', ''],
    ["Overall precision", f'{train_metrics['precision']:.4f}', f'{test_metrics['precision']:.4f}'],
    ["Class 0 precision", f'{train_metrics['0_precision']:.4f}', f'{test_metrics['0_precision']:.4f}'],
    ["Class 1 precision", f'{train_metrics['1_precision']:.4f}', f'{test_metrics['1_precision']:.4f}'],
    ["AUC-ROC", f'{train_metrics['auc-roc']:.4f}', f'{test_metrics['auc-roc']:.4f}'],
    ["AUC-PRC", f'{train_metrics['auc-prc']:.4f}', f'{test_metrics['auc-prc']:.4f}'],
]

print('\nRF Results:\n')
print(f'Best params: {best_rf_params}\n')
print(tabulate(result_body, headers=result_header, tablefmt='grid'))

with open('results/bace_class_scaffold_maccs_rf.txt', 'w') as file:
    file.write('BACE classification | Scaffold Splitting | MACCS Features | RF Model\n\n')
    file.write('Results:\n\n')
    file.write(f'Best params: {best_rf_params}\n\n')
    file.write(tabulate(result_body, headers=result_header, tablefmt='grid'))

100%|██████████| 100/100 [00:10<00:00,  9.67trial/s, best loss: -0.7657657657657657]

RF Results:

Best params: {'max_depth': 20, 'max_features': 55, 'min_samples_leaf': 2, 'min_samples_split': 8, 'n_estimators': 40}

+-------------------+---------+--------+
| Metrics           | Train   | Test   |
| Accuracy          | 0.9118  | 0.6711 |
+-------------------+---------+--------+
| Recall            |         |        |
+-------------------+---------+--------+
| Overall recall    | 0.9118  | 0.6711 |
+-------------------+---------+--------+
| Class 0 recall    | 0.9121  | 0.9577 |
+-------------------+---------+--------+
| Class 1 recall    | 0.9115  | 0.4198 |
+-------------------+---------+--------+
| Precision         |         |        |
+-------------------+---------+--------+
| Overall precision | 0.9121  | 0.7659 |
+-------------------+---------+--------+
| Class 0 precision | 0.9269  | 0.5913 |
+-------------------+---------+--------+
| Class 1 precision | 0.8939  | 0.9189 |
+--

#### XGBoost

In [6]:
# Hyperparameters tuning with Hyperopt
trials = Trials()

xgb_search_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 5, 300, 5)),
    'max_depth': scope.int(hp.quniform('max_depth', 1, 10, 1)),
    "min_child_weight": scope.int(hp.quniform("min_child_weight", 1, 10, 1)),
    "subsample": hp.uniform("subsample", 0.4, 1.0),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.4, 1.0),
    "reg_lambda": hp.loguniform("reg_lambda", np.log(0.00001), np.log(100)),
    "reg_alpha": hp.loguniform("reg_alpha", np.log(0.001), np.log(1000)),
    "learning_rate": hp.loguniform("learning_rate", np.log(0.0001), np.log(1.)),
    "booster": hp.choice('booster', ['gbtree', 'gblinear']),
    'tree_method': 'hist',
    'gamma': hp.uniform('gamma', 0., 5.)
}

def xgb_objective(params):
    model = XGBClassifier(
        n_estimators=params['n_estimators'], 
        max_depth=params['max_depth'], 
        min_child_weight=params['min_child_weight'], 
        subsample=params['subsample'], 
        colsample_bytree=params['colsample_bytree'],
        reg_lambda=params['reg_lambda'],
        reg_alpha=params['reg_alpha'],
        learning_rate=params['learning_rate'],
        booster=params['booster'],
        tree_method=params['tree_method'],
        gamma=params['gamma'],
        random_state=SEED,
        verbosity=0
    )
    model.fit(X_train, y_train)
    y_valid_hat = model.predict(X_valid)
    f1 = f1_score(y_valid, y_valid_hat)
    return -f1

best_xgb_params = fmin(
    fn=xgb_objective,
    space=xgb_search_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials
)

best_xgb_params = space_eval(xgb_search_space, best_xgb_params)

model = XGBClassifier(**best_xgb_params, random_state=SEED)
model.fit(X_merge, y_merge)
y_train_pred = model.predict(X_merge)
y_train_score = model.predict_proba(X_merge)[:, 1]
y_test_pred = model.predict(X_test)
y_test_score = model.predict_proba(X_test)[:, 1]

# Calculate metrics

train_metrics = classification_metrics(y_merge, y_train_pred, y_train_score)
test_metrics = classification_metrics(y_test, y_test_pred, y_test_score)

# Print

result_header = ['Metrics', 'Train', 'Test']
result_body = [
    ["Accuracy", f'{train_metrics['accuracy']:.4f}', f'{test_metrics['accuracy']:.4f}'],
    ["Recall"],
    ["Overall recall", f'{train_metrics['recall']:.4f}', f'{test_metrics['recall']:.4f}'],
    ["Class 0 recall", f'{train_metrics['0_recall']:.4f}', f'{test_metrics['0_recall']:.4f}'],
    ["Class 1 recall", f'{train_metrics['1_recall']:.4f}', f'{test_metrics['1_recall']:.4f}'],
    ["Precision", '', ''],
    ["Overall precision", f'{train_metrics['precision']:.4f}', f'{test_metrics['precision']:.4f}'],
    ["Class 0 precision", f'{train_metrics['0_precision']:.4f}', f'{test_metrics['0_precision']:.4f}'],
    ["Class 1 precision", f'{train_metrics['1_precision']:.4f}', f'{test_metrics['1_precision']:.4f}'],
    ["AUC-ROC", f'{train_metrics['auc-roc']:.4f}', f'{test_metrics['auc-roc']:.4f}'],
    ["AUC-PRC", f'{train_metrics['auc-prc']:.4f}', f'{test_metrics['auc-prc']:.4f}'],
]

print('\nXGBoost Results:\n')
print(f'Best params: {best_xgb_params}\n')
print(tabulate(result_body, headers=result_header, tablefmt='grid'))

with open('results/bace_class_scaffold_maccs_xgb.txt', 'w') as file:
    file.write('BACE classification | Scaffold Splitting | MACCS Features | XGBoost Model\n\n')
    file.write('Results:\n\n')
    file.write(f'Best params: {best_xgb_params}\n\n')
    file.write(tabulate(result_body, headers=result_header, tablefmt='grid'))

100%|██████████| 100/100 [00:15<00:00,  6.42trial/s, best loss: -0.7476635514018691]

XGBoost Results:

Best params: {'booster': 'gbtree', 'colsample_bytree': 0.40230957861188427, 'gamma': 1.7577303773478095, 'learning_rate': 0.5945091351116877, 'max_depth': 9, 'min_child_weight': 2, 'n_estimators': 60, 'reg_alpha': 0.006884678253406707, 'reg_lambda': 0.00012181130295506245, 'subsample': 0.7591560656349016, 'tree_method': 'hist'}

+-------------------+---------+--------+
| Metrics           | Train   | Test   |
| Accuracy          | 0.8949  | 0.6579 |
+-------------------+---------+--------+
| Recall            |         |        |
+-------------------+---------+--------+
| Overall recall    | 0.8949  | 0.6579 |
+-------------------+---------+--------+
| Class 0 recall    | 0.8935  | 0.8732 |
+-------------------+---------+--------+
| Class 1 recall    | 0.8967  | 0.4691 |
+-------------------+---------+--------+
| Precision         |         |        |
+-------------------+---------+-

#### ANN

In [7]:
trials = Trials()

class FCNClassifier(nn.Module):
    def __init__(
            self,
            input_dim,
            hidden_dim_1=128,
            hidden_dim_2=128,
            hidden_dim_3=128,
            hidden_dim_4=128,
            dropout_rate=0.2,
            activation='relu',
            n_layers=4
    ):
        super(FCNClassifier, self).__init__()
        
        self.n_layers = n_layers
        self.dropout_rate = dropout_rate
        self.activation = activation

        self.fc1 = nn.Linear(input_dim, hidden_dim_1, bias=True)
        self.fc2 = nn.Linear(hidden_dim_1, hidden_dim_2, bias=True)
        self.fc3 = nn.Linear(hidden_dim_2, hidden_dim_3, bias=True)
        self.fc4 = nn.Linear(hidden_dim_3, hidden_dim_4, bias=True)
        
        output_dims = [hidden_dim_1, hidden_dim_2, hidden_dim_3, hidden_dim_4]
        last_hidden_dim = output_dims[n_layers-1]
        self.out = nn.Linear(last_hidden_dim, 1)

        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):

        x = self.fc1(x)                     # Layer 1
        x = self._activation(x)
        x = self.dropout(x)
    
        if self.n_layers >= 2:              # Layer 2 (if applicable)
            x = self.fc2(x)
            x = self._activation(x)
            x = self.dropout(x)
        
        if self.n_layers >= 3:              # Layer 3 (if applicable)
            x = self.fc3(x)
            x = self._activation(x)
            x = self.dropout(x)

        if self.n_layers >= 4:              # Layer 4 (if applicable)
            x = self.fc4(x)
            x = self._activation(x)
            x = self.dropout(x)

        x = self.out(x)
        x = torch.sigmoid(x)

        return x
    
    def _activation(self, x):
        if self.activation == 'relu':
            return F.relu(x)
        elif self.activation == 'gelu':
            return F.gelu(x)
        elif self.activation == 'elu':
            return F.elu(x)
        elif self.activation == 'selu':
            return F.selu(x)
        else:
            return F.relu(x)
    
def train_ann(
        model,
        X_train,
        y_train,
        X_valid,
        y_valid,
        learning_rate = 0.001,
        batch_size=128,
        epochs=100,
        patience=10,
        device='cpu'
):
    model = model.to(device)

    train_dataset = TensorDataset(
        torch.as_tensor(np.asarray(X_train), dtype=torch.float32),
        torch.as_tensor(np.asarray(y_train), dtype=torch.float32).unsqueeze(1)
    )
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )
    
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss()
    
    early_stopping = True if X_valid is not None and y_valid is not None else False
    best_f1 = 0
    no_improvement_count = 0
    best_state = None
    best_num_epochs = 0

    for epoch in range(epochs):
        # Train
        model.train()
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        # Valid
        if early_stopping:
            model.eval()
            with torch.no_grad():
                X_valid_tensor = torch.FloatTensor(X_valid).to(device)
                y_valid_out = model(X_valid_tensor).cpu().numpy().flatten()
                y_valid_pred = (y_valid_out > 0.5).astype(int)
                f1 = f1_score(y_valid, y_valid_pred)

            if f1 > best_f1:
                best_f1 = f1
                no_improvement_count = 0
                best_state = model.state_dict().copy()
                best_num_epochs = epoch + 1
            else:
                no_improvement_count += 1
                if no_improvement_count >= patience:
                    break
    
    if early_stopping and best_state is not None:
        model.load_state_dict(best_state)
        return model, best_num_epochs
        
    return model, epochs

def predict_test(model, X, device='cpu'):
    model.eval()
    with torch.no_grad():
        X_tensor = torch.FloatTensor(X).to(device)
        predictions = model(X_tensor).cpu().numpy().flatten()

    return predictions

def ann_objective(
        params,
        X_train,
        y_train,
        X_valid,
        y_valid,
        input_dim,
        device='cpu'
):
    model = FCNClassifier(
        input_dim=input_dim,
        hidden_dim_1=params['hidden_dim_1'],
        hidden_dim_2=params['hidden_dim_2'],
        hidden_dim_3=params['hidden_dim_3'],
        hidden_dim_4=params['hidden_dim_4'],
        dropout_rate=params['dropout_rate'],
        activation=params['activation'],
        n_layers=params['n_layers']
    )

    model, best_num_epoch = train_ann(
        model,
        X_train,
        y_train,
        X_valid,
        y_valid,
        learning_rate=params['learning_rate'],
        epochs=params['epochs'],
        patience=params['patience'],
        device=device,
        batch_size=params['batch_size']
    )

    model.eval()
    with torch.no_grad():
        X_valid_tensor = torch.FloatTensor(X_valid).to(device)
        y_valid_out = model(X_valid_tensor).cpu().numpy().flatten()
        y_valid_pred = (y_valid_out > 0.5).astype(int)
        f1 = f1_score(y_valid, y_valid_pred)

    return {
        'loss': -f1,
        'status': 'ok',
        'best_num_epoch': best_num_epoch
    }

ann_search_space = {
    'n_layers': scope.int(hp.quniform('n_layers', 1, 4, 1)),
    'hidden_dim_1': scope.int(hp.quniform('hidden_dim_1', 32, 192, 32)),
    'hidden_dim_2': scope.int(hp.quniform('hidden_dim_2', 32, 192, 32)),
    'hidden_dim_3': scope.int(hp.quniform('hidden_dim_3', 32, 192, 32)),
    'hidden_dim_4': scope.int(hp.quniform('hidden_dim_4', 32, 192, 32)),
    'dropout_rate': hp.uniform('dropout_rate', 0.0, 0.5),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(0.01)),
    'batch_size': scope.int(hp.quniform('batch_size', 32, 128, 16)),
    'epochs': 100,
    'patience': 10,
    'activation': hp.choice('activation', ['relu', 'selu', 'elu', 'gelu'])
}

input_dim = len(X_train[0])

def objective_fn(params):
    return ann_objective(
        params=params,
        X_train=X_train,
        y_train=y_train,
        X_valid=X_valid,
        y_valid=y_valid,
        input_dim=input_dim
    )

best_ann_params = fmin(
    fn=objective_fn,
    space=ann_search_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials
)

best_ann_params = space_eval(ann_search_space, best_ann_params)

model = FCNClassifier(
    input_dim=input_dim,
    hidden_dim_1=best_ann_params['hidden_dim_1'],
    hidden_dim_2=best_ann_params['hidden_dim_2'],
    hidden_dim_3=best_ann_params['hidden_dim_3'],
    hidden_dim_4=best_ann_params['hidden_dim_4'],
    dropout_rate=best_ann_params['dropout_rate'],
    activation=best_ann_params['activation'],
    n_layers=best_ann_params['n_layers']
)

best_trial = trials.best_trial
best_num_epochs = best_trial['result']['best_num_epoch']

model, _ = train_ann(
    model,
    X_merge,
    y_merge,
    X_valid=None,
    y_valid=None,
    learning_rate=best_ann_params['learning_rate'],
    batch_size=best_ann_params['batch_size'],
    epochs=best_num_epochs,
)

y_train_score = predict_test(model, X_merge)
y_train_pred = (y_train_score > 0.5).astype(int)

y_test_score = predict_test(model, X_test)
y_test_pred = (y_test_score > 0.5).astype(int)

train_metrics = classification_metrics(y_merge, y_train_pred, y_train_score)
test_metrics = classification_metrics(y_test, y_test_pred, y_test_score)

# Print

result_header = ['Metrics', 'Train', 'Test']
result_body = [
    ["Accuracy", f'{train_metrics['accuracy']:.4f}', f'{test_metrics['accuracy']:.4f}'],
    ["Recall"],
    ["Overall recall", f'{train_metrics['recall']:.4f}', f'{test_metrics['recall']:.4f}'],
    ["Class 0 recall", f'{train_metrics['0_recall']:.4f}', f'{test_metrics['0_recall']:.4f}'],
    ["Class 1 recall", f'{train_metrics['1_recall']:.4f}', f'{test_metrics['1_recall']:.4f}'],
    ["Precision", '', ''],
    ["Overall precision", f'{train_metrics['precision']:.4f}', f'{test_metrics['precision']:.4f}'],
    ["Class 0 precision", f'{train_metrics['0_precision']:.4f}', f'{test_metrics['0_precision']:.4f}'],
    ["Class 1 precision", f'{train_metrics['1_precision']:.4f}', f'{test_metrics['1_precision']:.4f}'],
    ["AUC-ROC", f'{train_metrics['auc-roc']:.4f}', f'{test_metrics['auc-roc']:.4f}'],
    ["AUC-PRC", f'{train_metrics['auc-prc']:.4f}', f'{test_metrics['auc-prc']:.4f}'],
]

print('\nANN Results:\n')
print(f'Best params: {best_ann_params}\n')
print(tabulate(result_body, headers=result_header, tablefmt='grid'))

with open('results/bace_class_scaffold_maccs_ann.txt', 'w') as file:
    file.write('BACE classification | Scaffold Splitting | MACCS Features | ANN Model\n\n')
    file.write('Results:\n\n')
    file.write(f'Best params: {best_ann_params}\n\n')
    file.write(tabulate(result_body, headers=result_header, tablefmt='grid'))

100%|██████████| 100/100 [01:16<00:00,  1.30trial/s, best loss: -0.8200836820083682]

ANN Results:

Best params: {'activation': 'relu', 'batch_size': 112, 'dropout_rate': 0.2169016734007006, 'epochs': 100, 'hidden_dim_1': 160, 'hidden_dim_2': 96, 'hidden_dim_3': 192, 'hidden_dim_4': 160, 'learning_rate': 0.0026800644950214776, 'n_layers': 2, 'patience': 10}

+-------------------+---------+--------+
| Metrics           | Train   | Test   |
| Accuracy          | 0.8281  | 0.6711 |
+-------------------+---------+--------+
| Recall            |         |        |
+-------------------+---------+--------+
| Overall recall    | 0.8281  | 0.6711 |
+-------------------+---------+--------+
| Class 0 recall    | 0.7510  | 0.7887 |
+-------------------+---------+--------+
| Class 1 recall    | 0.9230  | 0.5679 |
+-------------------+---------+--------+
| Precision         |         |        |
+-------------------+---------+--------+
| Overall precision | 0.8458  | 0.6893 |
+-------------------+---