In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (classification_report, confusion_matrix, roc_auc_score, 
                             accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef)
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.initializers import HeNormal, GlorotUniform
import seaborn as sns

# Load data from CSV file
file_path = r"D:\Salman_Sir_Paper\2nd versions\mt\shap.csv"
data = pd.read_csv(file_path)

# Assuming the last column is the target variable and all other columns are features
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Initialize KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize metrics storage
fold_metrics = []
fold = 1

for train_index, test_index in kf.split(X_resampled):
    X_train, X_test = X_resampled[train_index], X_resampled[test_index]
    y_train, y_test = y_resampled[train_index], y_resampled[test_index]

    # Build the DNN model
    initializer = HeNormal()
    model = Sequential([
        Dense(256, activation='relu', kernel_initializer=initializer, input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(128, activation='relu', kernel_initializer=initializer),
        Dropout(0.1),
        Dense(64, activation='relu', kernel_initializer=initializer),
        Dropout(0.1),
        Dense(32, activation='relu', kernel_initializer=initializer),
        Dropout(0.1),
        Dense(1, activation='sigmoid', kernel_initializer=GlorotUniform())  # Single output unit for binary classification
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

    # Generate predictions
    y_pred = model.predict(X_test)
    y_pred_classes = (y_pred > 0.5).astype("int32")
    
    # Metrics calculation
    precision = precision_score(y_test, y_pred_classes, average='weighted')
    recall = recall_score(y_test, y_pred_classes, average='weighted')
    f1 = f1_score(y_test, y_pred_classes, average='weighted')
    mcc = matthews_corrcoef(y_test, y_pred_classes)
    auc = roc_auc_score(y_test, y_pred)

    print(f"Fold {fold} Metrics:")
    print(f"Loss: {loss:.4f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, MCC: {mcc:.4f}, AUC: {auc:.4f}\n")
    
    fold_metrics.append({'Fold': fold, 'Loss': loss, 'Accuracy': accuracy, 'Precision': precision, 
                         'Recall': recall, 'F1 Score': f1, 'MCC': mcc, 'AUC': auc})
    fold += 1

# Convert metrics to DataFrame
metrics_df = pd.DataFrame(fold_metrics)
print(metrics_df)

# Confusion matrix for the last fold
conf_matrix = confusion_matrix(y_test, y_pred_classes)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Visualization of training history for the last fold
fig, axes = plt.subplots(2, 5, figsize=(20, 10))
axes = axes.ravel()

# Accuracy plot
axes[0].plot(history.history['accuracy'], label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].legend()

# Loss plot
axes[1].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['val_loss'], label='Validation Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].legend()

# Placeholder plots for remaining
for i in range(2, 10):
    axes[i].text(0.5, 0.5, f'Plot {i+1}', fontsize=15, ha='center')
    axes[i].set_axis_off()

plt.tight_layout()
plt.show()
