In [97]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [98]:
# Load the dataset
df = pd.read_csv('final_mental_health_dataset.csv')

In [99]:
# Features (X)
features = ['gender', 'bmi', 'phq_score', 'gad_score', 'epworth_score', 
            'depression_severity', 'anxiety_severity', 'suicidal', 'depressiveness', 
            'anxiousness', 'anxiety_diagnosis', 'sleepiness']
X = df[features].copy()

In [100]:
# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [101]:
# Targets (y) - The 17 binary conditions
conditions = [
    'Major Depressive Disorder (MDD)', 'Dysthymia', 'Seasonal Affective Disorder (SAD)', 
    'Generalized Anxiety Disorder (GAD)', 'Panic Disorder', 'PTSD', 
    'Insomnia', 'Hypersomnia', 'Sleep Apnea', 
    'Suicidal Ideation', 'High-Risk Crisis', 
    'Chronic Stress', 'Adjustment Disorder', 'Burnout'
]
y = df[conditions].copy()

In [102]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [103]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

In [104]:
# Prepare DataLoader
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

In [105]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.layer2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.output = nn.Linear(64, output_size)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.layer1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.layer2(x)))
        x = self.dropout(x)
        x = self.output(x)  # Remove sigmoid activation
        return x


In [106]:
# Initialize the model, loss function, and optimizer
model = NeuralNetwork(X_train.shape[1], y_train.shape[1])  # Output size = number of conditions
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss for multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.01)  # L2 Regularization

In [107]:
# Learning rate scheduler (optional improvement)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

In [108]:
# Train the Neural Network with Early Stopping
epochs = 100  # Increased number of epochs
patience = 5  # Early stopping patience
best_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    # Validation loss
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            output = model(X_batch)
            loss = criterion(output, y_batch)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    
    # Early stopping and scheduler
    if val_loss < best_loss:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
    
    scheduler.step(val_loss)  # Step the scheduler

In [109]:
# Evaluate the models
model.eval()
with torch.no_grad():
    y_pred = []
    for X_batch, _ in test_loader:
        output = model(X_batch)  # Output for all conditions
        y_pred.append(output)
    
    y_pred = torch.cat(y_pred).numpy()
    y_pred_binary = (y_pred > 0.30).astype(int)  # Convert probabilities to binary predictions

In [110]:
# Store results
results = []

In [111]:
# Train and evaluate models for each condition
for i, condition in enumerate(conditions):
    print(f"Training models for {condition}...")

    # Extract target for the current condition
    y_train_cond = y_train_tensor[:, i]  # Extract the i-th condition
    y_test_cond = y_test_tensor[:, i]  # Extract the i-th condition
    
    # Logistic Regression
    lr_model = LogisticRegression(max_iter=30, penalty='l2', random_state=42, C=1)
    lr_model.fit(X_train, y_train_cond)
    lr_pred = lr_model.predict(X_test)
    lr_proba = lr_model.predict_proba(X_test)[:, 1]

    # Evaluate Logistic Regression
    lr_accuracy = accuracy_score(y_test_cond, lr_pred)
    lr_precision = precision_score(y_test_cond, lr_pred, zero_division=0)
    lr_recall = recall_score(y_test_cond, lr_pred, zero_division=0)
    lr_f1 = f1_score(y_test_cond, lr_pred, zero_division=0)
    lr_roc_auc = roc_auc_score(y_test_cond, lr_proba)

    # Neural Network
    nn_accuracy = accuracy_score(y_test_cond, y_pred_binary[:, i])  # Evaluate for current condition
    nn_precision = precision_score(y_test_cond, y_pred_binary[:, i], zero_division=0)
    nn_recall = recall_score(y_test_cond, y_pred_binary[:, i], zero_division=0)
    nn_f1 = f1_score(y_test_cond, y_pred_binary[:, i], zero_division=0)
    nn_roc_auc = roc_auc_score(y_test_cond, y_pred[:, i])

    # Store results
    results.append({
        'Condition': condition,
        'Logistic Regression Accuracy': lr_accuracy,
        'Neural Network Accuracy': nn_accuracy,

        'Logistic Regression Precision': lr_precision,
        'Neural Network Precision': nn_precision,

        'Logistic Regression Recall': lr_recall,
        'Neural Network Recall': nn_recall,

        'Logistic Regression F1': lr_f1,
        'Neural Network F1': nn_f1,

        'Logistic Regression ROC-AUC': lr_roc_auc,
        'Neural Network ROC-AUC': nn_roc_auc
    })

Training models for Major Depressive Disorder (MDD)...
Training models for Dysthymia...
Training models for Seasonal Affective Disorder (SAD)...
Training models for Generalized Anxiety Disorder (GAD)...
Training models for Panic Disorder...
Training models for PTSD...
Training models for Insomnia...
Training models for Hypersomnia...
Training models for Sleep Apnea...
Training models for Suicidal Ideation...
Training models for High-Risk Crisis...
Training models for Chronic Stress...
Training models for Adjustment Disorder...
Training models for Burnout...


In [112]:
# Convert results to DataFrame
results_df = pd.DataFrame(results)
results_df.head()

Unnamed: 0,Condition,Logistic Regression Accuracy,Neural Network Accuracy,Logistic Regression Precision,Neural Network Precision,Logistic Regression Recall,Neural Network Recall,Logistic Regression F1,Neural Network F1,Logistic Regression ROC-AUC,Neural Network ROC-AUC
0,Major Depressive Disorder (MDD),0.987234,0.978723,0.916667,0.9,0.846154,0.692308,0.88,0.782609,0.998614,0.991337
1,Dysthymia,0.987234,0.987234,0.882353,0.933333,0.9375,0.875,0.909091,0.903226,0.995148,0.995719
2,Seasonal Affective Disorder (SAD),0.948936,0.948936,0.588235,1.0,0.666667,0.2,0.625,0.333333,0.970909,0.975455
3,Generalized Anxiety Disorder (GAD),0.995745,0.991489,1.0,1.0,0.970588,0.941176,0.985075,0.969697,0.999854,0.999854
4,Panic Disorder,0.991489,0.982979,1.0,1.0,0.857143,0.714286,0.923077,0.833333,1.0,0.999354


In [113]:
import torch
import torch.nn.functional as F
import joblib
import pandas as pd
from lime.lime_tabular import LimeTabularExplainer

In [114]:
# Set model to evaluation mode to prevent BatchNorm issues
model.eval()

NeuralNetwork(
  (layer1): Linear(in_features=12, out_features=128, bias=True)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer2): Linear(in_features=128, out_features=64, bias=True)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output): Linear(in_features=64, out_features=14, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [115]:
# Generate LIME explanations for a sample prediction
explainer = LimeTabularExplainer(
    training_data=X_train,
    feature_names=features,
    class_names=conditions,
    mode='classification',
    discretize_continuous=True,
    random_state=42
)

In [116]:
# Choose a test instance to explain
sample_idx = 0
instance = X_test[sample_idx]
true_labels = y_test.iloc[sample_idx]

In [117]:
# Convert instance to tensor and predict
instance_tensor = torch.tensor(instance, dtype=torch.float32).unsqueeze(0)
with torch.no_grad():
    nn_output = model(instance_tensor).numpy()[0]
nn_prediction = (nn_output > 0.3).astype(int)


In [118]:
# LIME prediction function
def predict_fn(x):
    x_tensor = torch.tensor(x, dtype=torch.float32)
    with torch.no_grad():
        outputs = model(x_tensor).numpy()
        outputs = F.softmax(torch.tensor(outputs), dim=1).numpy()  # Apply softmax for probability outputs
    return outputs


In [119]:
# # Generate explanations for each predicted condition
# for i, condition in enumerate(conditions):
#     # if nn_prediction[i] == 1:
#     exp = explainer.explain_instance(
#         instance,
#         predict_fn,
#         num_features=len(features),
#         top_labels=len(conditions),
#         labels=(i,)
#     )
#     exp.save_to_file(f'lime_explanation_{condition}.html')


In [120]:
# Save the trained model
torch.save(model.state_dict(), 'mental_health_model.pth')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']