In [1]:

import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm, datasets, metrics
from sklearn.metrics import auc, roc_auc_score, roc_curve, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import StandardScaler

from imblearn.combine import *
from imblearn.over_sampling import *

In [2]:
df= pd.read_csv('total_df.csv')
x = df.iloc[:,11:]
y = df['toxicity']

In [3]:
from sklearn.model_selection import StratifiedKFold

# StratifiedKFold 10-fold
cv = StratifiedKFold(n_splits=10, shuffle = True, random_state=8)

In [4]:
# Attention model

from keras.layers import Dense, Dropout, MultiHeadAttention
from keras import backend as K
from keras.layers import Softmax
from tensorflow import keras
import tensorflow
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

def create_attention_model(input_dim):
    
    inputs = tf.keras.layers.Input(shape=(input_dim,))
    dense_v = tf.keras.layers.Dense(input_dim, activation = None)(inputs)
    attn_score = tf.keras.layers.Softmax(axis = -1)(dense_v)
    cal_score = tf.math.multiply(inputs, attn_score)
    Dense1 = tf.keras.layers.Dense(512, activation = 'relu', 
                          kernel_initializer = initializer)(cal_score)
    Dense1_BN = tf.keras.layers.BatchNormalization()(Dense1)
    Dropout = tf.keras.layers.Dropout(rate=0.25)(Dense1_BN)
    outputs = tf.keras.layers.Dense(1, activation = 'sigmoid')(Dropout)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999),
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model

#callbacks
callbacks = [
    tensorflow.keras.callbacks.ModelCheckpoint(
        "random_DILIattention.h5", save_best_only=True, monitor="val_loss"
    ),
    tensorflow.keras.callbacks.EarlyStopping(monitor="val_loss", patience=30, verbose=1),
]


# input data
input_dim = x.shape[1]

# Layer weight initializers 
initializer = tf.keras.initializers.HeNormal()


# L2 regularizer 
from tensorflow.keras import regularizers
regularizer = regularizers.l2(0.001)

#model hyperparameter
epochs = 10
batch_size = 32

In [6]:
from sklearn.metrics import f1_score
from sklearn.metrics import f1_score, roc_auc_score, confusion_matrix, precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score

# Initialize variables to store performance metrics
total_accuracy, total_precision, total_recall, total_specificity, total_f1_score = 0, 0, 0, 0, 0

# Iterate through folds
for i, (train_idx, test_idx) in enumerate(cv.split(x, y)):
    # Create and train the NN model on the current fold
    nn = create_attention_model(input_dim=x.shape[1])
    nn.fit(x.iloc[train_idx], y.iloc[train_idx], epochs=10, batch_size=32, verbose=0)
    
    # Predict probabilities and convert to binary predictions
    y_scores = nn.predict(x.iloc[test_idx])
    y_pred = (y_scores > 0.5).astype(int)
    
    # Calculate confusion matrix and performance metrics
    tn, fp, fn, tp = confusion_matrix(y.iloc[test_idx], y_pred).ravel()
    accuracy = accuracy_score(y.iloc[test_idx], y_pred)
    precision = precision_score(y.iloc[test_idx], y_pred)
    recall = recall_score(y.iloc[test_idx], y_pred) # same as sensitivity
    specificity = tn / (tn + fp)
    f1 = f1_score(y.iloc[test_idx], y_pred)
    
    # Accumulate metrics
    total_accuracy += accuracy
    total_precision += precision
    total_recall += recall
    total_specificity += specificity
    total_f1_score += f1

# Average the performance metrics over all folds
num_folds = cv.get_n_splits()
avg_accuracy = total_accuracy / num_folds
avg_precision = total_precision / num_folds
avg_recall = total_recall / num_folds
avg_specificity = total_specificity / num_folds
avg_f1_score = total_f1_score / num_folds

print("Average Accuracy:", avg_accuracy)
print("Average Precision:", avg_precision)
print("Average Recall/Sensitivity:", avg_recall)
print("Average Specificity:", avg_specificity)
print("Average F1 Score:", avg_f1_score)








































Average Accuracy: 0.7486486486486486
Average Precision: 0.7695032708843977
Average Recall/Sensitivity: 0.7414912280701754
Average Specificity: 0.7561672908863921
Average F1 Score: 0.7443325674289769
