In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Conv1D, GlobalMaxPooling1D, LSTM

# === 1. Data Preprocessing ===

# Load the dataset
file_path = "Mental Health Dataset.csv"
df = pd.read_csv(file_path)

# Drop irrelevant columns (adjust as needed)
df = df.drop(columns=['Timestamp', 'Country', 'Occupation'])

# Fill missing values using the mode for each column
df = df.fillna(df.mode().iloc[0])

# Identify all categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Categorical columns:", categorical_cols)

# Encode all categorical columns using LabelEncoder
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Separate features and target (target is "Mood_Swings")
X = df.drop(columns=['Mood_Swings'])
y = df['Mood_Swings']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# For CNN and LSTM models, reshape data as a sequence (each feature as a "time step")
num_features = X_train_scaled.shape[1]
X_train_seq = X_train_scaled.reshape(-1, num_features, 1)
X_test_seq = X_test_scaled.reshape(-1, num_features, 1)

# Determine the number of classes for output layer
num_classes = len(np.unique(y))

# === 2. Define Deep Learning Models ===

# Model 1: Simple MLP
def build_mlp_model_simple(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model 2: Deep MLP with Batch Normalization
def build_mlp_model_deep(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_dim=input_dim),
        BatchNormalization(),
        Dropout(0.4),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model 3: CNN Model (1D convolution)
def build_cnn_model(input_shape):
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape),
        GlobalMaxPooling1D(),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model 4: LSTM Model (treat features as a sequence)
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, input_shape=input_shape),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# === 3. Train and Evaluate Models ===

models = {
    "MLP_Simple": build_mlp_model_simple(X_train_scaled.shape[1]),
    "MLP_Deep": build_mlp_model_deep(X_train_scaled.shape[1]),
    "CNN": build_cnn_model((num_features, 1)),
    "LSTM": build_lstm_model((num_features, 1))
}

results = {}

for model_name, model in models.items():
    print(f"\n=== Training {model_name} Model ===")
    
    if model_name in ["CNN", "LSTM"]:
        history = model.fit(X_train_seq, y_train, epochs=10, batch_size=32,
                            validation_split=0.1, verbose=1)
        loss, acc = model.evaluate(X_test_seq, y_test, verbose=0)
        y_pred_probs = model.predict(X_test_seq)
    else:
        history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=32,
                            validation_split=0.1, verbose=1)
        loss, acc = model.evaluate(X_test_scaled, y_test, verbose=0)
        y_pred_probs = model.predict(X_test_scaled)
    
    # Convert predicted probabilities to class labels
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Generate confusion matrix and classification report
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f"{model_name} - Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(report)
    
    results[model_name] = {
        "loss": loss,
        "accuracy": acc,
        "confusion_matrix": cm,
        "classification_report": report
    }

# === 4. Model Performance Comparison ===

print("\n=== Overall Model Performance ===")
for model_name, metrics in results.items():
    print(f"{model_name}: Accuracy = {metrics['accuracy']:.4f}")


Categorical columns: ['Gender', 'self_employed', 'family_history', 'treatment', 'Days_Indoors', 'Growing_Stress', 'Changes_Habits', 'Mental_Health_History', 'Mood_Swings', 'Coping_Struggles', 'Work_Interest', 'Social_Weakness', 'mental_health_interview', 'care_options']

=== Training MLP_Simple Model ===
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
MLP_Simple - Test Loss: 0.7561, Test Accuracy: 0.6702
Confusion Matrix:
[[12554  2804  2754]
 [ 2965 12200  4955]
 [ 2942  2866 14433]]
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.69      0.69     18112
           1       0.68      0.61      0.64     20120
           2       0.65      0.71      0.68     20241

    accuracy                           0.67     58473
   macro avg       0.67      0.67      0.67     58473
weighted avg       0.67      0.67      0.67     58473


=== Training MLP_Deep Model ===
Epoch 1/10

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Conv1D, GlobalMaxPooling1D, LSTM

# ===== 1. Data Preprocessing =====

# Load the dataset
file_path = "Mental Health Dataset.csv"
df = pd.read_csv(file_path)

# Drop irrelevant columns (adjust as needed)
df = df.drop(columns=['Timestamp', 'Country', 'Occupation'])

# Fill missing values with the mode of each column
df = df.fillna(df.mode().iloc[0])

# Identify categorical columns (all columns are object type here)
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Categorical columns:", categorical_cols)

# Encode all categorical columns using LabelEncoder
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Separate features and target (target: 'Mood_Swings')
X = df.drop(columns=['Mood_Swings'])
y = df['Mood_Swings']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (for models that use flat input)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# For sequence-based models, reshape to 3D: (samples, timesteps, channels)
# Here we treat each feature as a time step (channels=1)
num_features = X_train_scaled.shape[1]
X_train_seq = X_train_scaled.reshape(-1, num_features, 1)
X_test_seq = X_test_scaled.reshape(-1, num_features, 1)

# Determine number of classes from the target
num_classes = len(np.unique(y))

# ===== 2. Define Advanced Deep Learning Models =====

# Model 1: Simple MLP
def build_mlp_model_simple(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model 2: Deep MLP with Batch Normalization
def build_mlp_model_deep(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_dim=input_dim),
        BatchNormalization(),
        Dropout(0.4),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model 3: CNN Model (1D convolution)
def build_cnn_model(input_shape):
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape),
        GlobalMaxPooling1D(),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model 4: LSTM Model (treat features as a sequence)
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, input_shape=input_shape),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# --- BiLSTM Model ---
def build_bilstm_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True), input_shape=input_shape),
        tf.keras.layers.GlobalMaxPooling1D(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# --- CNN-LSTM Model ---
def build_cnn_lstm_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# --- Transformer Model ---
def build_transformer_model(input_shape):
    inputs = tf.keras.Input(shape=input_shape)
    # Multi-head attention
    attn_output = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=32)(inputs, inputs)
    attn_output = tf.keras.layers.Add()([inputs, attn_output])  # Residual connection
    attn_output = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attn_output)
    # Feed-forward network
    ff = tf.keras.layers.Dense(64, activation='relu')(attn_output)
    ff = tf.keras.layers.Dense(64)(ff)
    ff = tf.keras.layers.Add()([attn_output, ff])
    ff = tf.keras.layers.LayerNormalization(epsilon=1e-6)(ff)
    pooled = tf.keras.layers.GlobalAveragePooling1D()(ff)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(pooled)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# --- TCN Model ---
def build_tcn_model(input_shape):
    from tcn import TCN  # Ensure keras-tcn is installed: pip install keras-tcn
    model = tf.keras.Sequential([
        TCN(64, input_shape=input_shape),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# --- Supervised Autoencoder Model ---
# This model jointly learns to reconstruct the input and to classify it.
def build_supervised_autoencoder(input_dim):
    inputs = tf.keras.Input(shape=(input_dim,))
    encoded = tf.keras.layers.Dense(64, activation='relu', name="enc1")(inputs)
    encoded = tf.keras.layers.Dense(32, activation='relu', name="enc2")(encoded)
    bottleneck = tf.keras.layers.Dense(16, activation='relu', name='bottleneck')(encoded)
    # Decoder branch for reconstruction
    decoded = tf.keras.layers.Dense(32, activation='relu', name="dec1")(bottleneck)
    decoded = tf.keras.layers.Dense(64, activation='relu', name="dec2")(decoded)
    reconstruction = tf.keras.layers.Dense(input_dim, activation='linear', name="reconstruction")(decoded)
    # Classification branch from bottleneck features
    classifier_output = tf.keras.layers.Dense(num_classes, activation='softmax', name='classifier')(bottleneck)
    
    model = tf.keras.Model(inputs=inputs, outputs=[reconstruction, classifier_output])
    model.compile(optimizer='adam', 
                  loss={'reconstruction': 'mse', 'classifier': 'sparse_categorical_crossentropy'},
                  loss_weights={'reconstruction': 0.5, 'classifier': 1.0},
                  metrics={'classifier': 'accuracy'})
    return model

# ===== 3. Train and Evaluate Each Model =====

# Create a dictionary of models with appropriate input shapes.
# For sequence-based models, we use the 3D data (X_train_seq).
# For the autoencoder, we use the flat standardized data.
models = {
    "MLP_Simple": build_mlp_model_simple(X_train_scaled.shape[1]),
    "MLP_Deep": build_mlp_model_deep(X_train_scaled.shape[1]),
    "CNN": build_cnn_model((num_features, 1)),
    "LSTM": build_lstm_model((num_features, 1)),
    "BiLSTM": build_bilstm_model((num_features, 1)),
    "CNN_LSTM": build_cnn_lstm_model((num_features, 1)),
    "Transformer": build_transformer_model((num_features, 1)),
    "TCN": build_tcn_model((num_features, 1)),
    "Autoencoder": build_supervised_autoencoder(X_train_scaled.shape[1])
}

results = {}
epochs = 20
batch_size = 32

for model_name, model in models.items():
    print(f"\n=== Training {model_name} Model ===")
    if model_name == "Autoencoder":
        # Train the autoencoder model on flat input.
        history = model.fit(X_train_scaled, {"reconstruction": X_train_scaled, "classifier": y_train},
                            epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1)
        # Evaluate on the test set.
        eval_results = model.evaluate(X_test_scaled, {"reconstruction": X_test_scaled, "classifier": y_test}, verbose=0)
        # The classifier branch is the second output.
        y_pred_probs = model.predict(X_test_scaled)[1]
    else:
        # Train sequence-based models on reshaped data.
        history = model.fit(X_train_seq, y_train, epochs=epochs, batch_size=batch_size,
                            validation_split=0.1, verbose=1)
        loss, acc = model.evaluate(X_test_seq, y_test, verbose=0)
        y_pred_probs = model.predict(X_test_seq)
    
    y_pred = np.argmax(y_pred_probs, axis=1)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    # For autoencoder, extract classifier accuracy from evaluation metrics if needed.
    if model_name == "Autoencoder":
        # The evaluation returned [total_loss, rec_loss, classifier_loss, classifier_accuracy]
        clf_acc = eval_results[-1]
        print(f"{model_name} - Test Classifier Accuracy: {clf_acc:.4f}")
    else:
        print(f"{model_name} - Test Accuracy: {acc:.4f}")
    
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(report)
    
    results[model_name] = {
        "accuracy": clf_acc if model_name == "Autoencoder" else acc,
        "confusion_matrix": cm,
        "classification_report": report
    }

# ===== 4. Compare Overall Model Performance =====
print("\n=== Overall Model Performance ===")
for model_name, metrics in results.items():
    print(f"{model_name}: Accuracy = {metrics['accuracy']:.4f}")


Categorical columns: ['Gender', 'self_employed', 'family_history', 'treatment', 'Days_Indoors', 'Growing_Stress', 'Changes_Habits', 'Mental_Health_History', 'Mood_Swings', 'Coping_Struggles', 'Work_Interest', 'Social_Weakness', 'mental_health_interview', 'care_options']

=== Training MLP_Simple Model ===
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
MLP_Simple - Test Accuracy: 0.7151
Confusion Matrix:
[[14050  2882  1180]
 [ 2722 14558  2840]
 [ 3168  3865 13208]]
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.78      0.74     18112
           1       0.68      0.72      0.70     20120
           2       0.77      0.65      0.71     20241

    accuracy                           0.72     58473
   macro avg       0.72      0.72      0.72     584

Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
BiLSTM - Test Accuracy: 0.9468
Confusion Matrix:
[[16478   648   986]
 [   31 19819   270]
 [  106  1071 19064]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.91      0.95     18112
           1       0.92      0.99      0.95     20120
           2       0.94      0.94      0.94     20241

    accuracy                           0.95     58473
   macro avg       0.95      0.95      0.95     58473
weighted avg       0.95      0.95      0.95     58473


=== Training CNN_LSTM Model ===
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CNN_LSTM - Test Accuracy: 0.9466
Confusion Matrix:
[[16484   520  1108]
 [  111 18709  1300]
 [    1    81 20159]]
Classification Repor

Epoch 1/20


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
TCN - Test Accuracy: 0.9478
Confusion Matrix:
[[16963   281   868]
 [  395 19693    32]
 [  207  1267 18767]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.94      0.95     18112
           1       0.93      0.98      0.95     20120
           2       0.95      0.93      0.94     20241

    accuracy                           0.95     58473
   macro avg       0.95      0.95      0.95     58473
weighted avg       0.95      0.95      0.95     58473


=== Training Autoencoder Model ===
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

In [1]:
print(1)

1


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Conv1D, GlobalMaxPooling1D, LSTM

# === 1. Data Preprocessing ===

# Load the dataset
file_path = "Mental Health Dataset.csv"
df = pd.read_csv(file_path)

# Drop irrelevant columns (adjust as needed)
df = df.drop(columns=['Timestamp', 'Country', 'Occupation'])

# Fill missing values using the mode for each column
df = df.fillna(df.mode().iloc[0])

# Identify all categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Categorical columns:", categorical_cols)

# Encode all categorical columns using LabelEncoder
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Separate features and target (target is "Mood_Swings")
X = df.drop(columns=['Mood_Swings'])
y = df['Mood_Swings']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# For CNN and LSTM models, reshape data as a sequence (each feature as a "time step")
num_features = X_train_scaled.shape[1]
X_train_seq = X_train_scaled.reshape(-1, num_features, 1)
X_test_seq = X_test_scaled.reshape(-1, num_features, 1)

# Determine the number of classes for output layer
num_classes = len(np.unique(y))

# === 2. Define Deep Learning Models ===


# Model 4: LSTM Model (treat features as a sequence)
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, input_shape=input_shape),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# === 3. Train and Evaluate Models ===

models = {
    "LSTM": build_lstm_model((num_features, 1))
}

results = {}

for model_name, model in models.items():
    print(f"\n=== Training {model_name} Model ===")
    
    if model_name in ["CNN", "LSTM"]:
        history = model.fit(X_train_seq, y_train, epochs=10, batch_size=32,
                            validation_split=0.1, verbose=1)
        loss, acc = model.evaluate(X_test_seq, y_test, verbose=0)
        y_pred_probs = model.predict(X_test_seq)
    else:
        history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=32,
                            validation_split=0.1, verbose=1)
        loss, acc = model.evaluate(X_test_scaled, y_test, verbose=0)
        y_pred_probs = model.predict(X_test_scaled)
    
    # Convert predicted probabilities to class labels
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Generate confusion matrix and classification report
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f"{model_name} - Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(report)
    
    results[model_name] = {
        "loss": loss,
        "accuracy": acc,
        "confusion_matrix": cm,
        "classification_report": report
    }

# === 4. Model Performance Comparison ===

print("\n=== Overall Model Performance ===")
for model_name, metrics in results.items():
    print(f"{model_name}: Accuracy = {metrics['accuracy']:.4f}")


Categorical columns: ['Gender', 'self_employed', 'family_history', 'treatment', 'Days_Indoors', 'Growing_Stress', 'Changes_Habits', 'Mental_Health_History', 'Mood_Swings', 'Coping_Struggles', 'Work_Interest', 'Social_Weakness', 'mental_health_interview', 'care_options']

=== Training LSTM Model ===
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
LSTM - Test Loss: 0.0787, Test Accuracy: 0.9481
Confusion Matrix:
[[17153   553   406]
 [  130 19796   194]
 [  687  1062 18492]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95     18112
           1       0.92      0.98      0.95     20120
           2       0.97      0.91      0.94     20241

    accuracy                           0.95     58473
   macro avg       0.95      0.95      0.95     58473
weighted avg       0.95      0.95      0.95     58473


=== Overall Model Performance ===
LSTM: Accuracy = 0.9

In [3]:
import pickle

lstm_model = models.get("LSTM")
# Save the LSTM model to an H5 file
lstm_model.save('lstm_model.h5')
print("LSTM model saved as lstm_model.h5")

# Save the scaler object using pickle
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)
print("Scaler saved as scaler.pkl")

LSTM model saved as lstm_model.h5
Scaler saved as scaler.pkl


  saving_api.save_model(


In [4]:
from tensorflow.keras.models import load_model
import pickle

# Load the saved LSTM model
lstm_model = load_model('lstm_model.h5')
print("LSTM model loaded successfully.")

# Load the scaler object
with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)
print("Scaler loaded successfully.")
# Load the Label object
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)
    
print("Label loaded successfully.")

LSTM model loaded successfully.
Scaler loaded successfully.


In [25]:
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)
    
print("Label loaded successfully.")

Label loaded successfully.


In [24]:
import pickle
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

# === 1. Load Saved Artifacts ===
lstm_model = load_model('lstm_model.h5')
print("LSTM model loaded.")

with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
print("Scaler loaded.")

with open('label_encoders.pkl', 'rb') as f:
    label_encoders = pickle.load(f)
print("Label encoders loaded.")

# === 2. Define Questions and Options Based on the CSV Columns ===
# These options are inferred from the sample dataset.
questions_options = {
    "Gender": ["Female", "Male"],
    "self_employed": ["No", "Yes"],
    "family_history": ["No", "Yes"],
    "treatment": ["No", "Yes"],
    "Days_Indoors": ["Go out Every day", "1-14 days", "15-30 days", "31-60 days", "More than 2 months"],
    "Growing_Stress": ["Maybe", "No", "Yes"],
    "Changes_Habits": ["Maybe", "No", "Yes"],
    "Mental_Health_History": ["Maybe", "No", "Yes"],
    "Coping_Struggles": ["No", "Yes"],
    "Work_Interest": ["Maybe", "No", "Yes"],
    "Social_Weakness": ["Maybe", "No", "Yes"],
    "mental_health_interview": ["Maybe", "No", "Yes"],
    "care_options": ["No", "Not sure", "Yes"]
}

# === 3. Collect User Input via the Chatbot Interface ===
user_data = {}
print("\nPlease answer the following questions by entering the option number:")

for feature, options in questions_options.items():
    print(f"\n{feature}:")
    for idx, option in enumerate(options, start=1):
        print(f"  {idx}. {option}")
    while True:
        choice = input("Enter the option number: ")
        try:
            choice_index = int(choice) - 1
            if 0 <= choice_index < len(options):
                user_data[feature] = options[choice_index]
                break
            else:
                print("Invalid choice. Please try again.")
        except ValueError:
            print("Invalid input. Please enter a number.")

# Display the collected inputs
df_input = pd.DataFrame([user_data])
print("\nUser input received:")
print(df_input)

# === 4. Encode the User Input Using the Saved Label Encoders ===
# Each feature should be encoded using the same encoder as used during training.
for feature in questions_options.keys():
    if feature in label_encoders:
        le = label_encoders[feature]
        try:
            df_input[feature] = le.transform(df_input[feature])
        except Exception as e:
            print(f"Error encoding {feature}: {e}")
            exit(1)
    else:
        # If any feature is numeric, ensure proper conversion
        df_input[feature] = pd.to_numeric(df_input[feature], errors='coerce')

# Arrange features in the same order as used in training
features_order = list(questions_options.keys())
X_input = df_input[features_order].values

# === 5. Preprocess the Input and Predict Using the LSTM Model ===
# Scale the input using the saved scaler
X_input_scaled = scaler.transform(X_input)

# Reshape input to 3D as expected by the LSTM: (samples, timesteps, channels)
num_features = X_input_scaled.shape[1]
X_input_seq = X_input_scaled.reshape(-1, num_features, 1)

# Get prediction probabilities and predicted class
prediction_probabilities = lstm_model.predict(X_input_seq)
predicted_class = np.argmax(prediction_probabilities, axis=1)

print("\nPredicted Stress (Mood Swing) Class:", predicted_class[0])


LSTM model loaded.
Scaler loaded.
Label encoders loaded.

Please answer the following questions by entering the option number:

Gender:
  1. Female
  2. Male
Enter the option number: 1

self_employed:
  1. No
  2. Yes
Enter the option number: 2

family_history:
  1. No
  2. Yes
Enter the option number: 1

treatment:
  1. No
  2. Yes
Enter the option number: 1

Days_Indoors:
  1. Go out Every day
  2. 1-14 days
  3. 15-30 days
  4. 31-60 days
  5. More than 2 months
Enter the option number: 5

Growing_Stress:
  1. Maybe
  2. No
  3. Yes
Enter the option number: 1

Changes_Habits:
  1. Maybe
  2. No
  3. Yes
Enter the option number: 2

Mental_Health_History:
  1. Maybe
  2. No
  3. Yes
Enter the option number: 3

Coping_Struggles:
  1. No
  2. Yes
Enter the option number: 2

Work_Interest:
  1. Maybe
  2. No
  3. Yes
Enter the option number: 1

Social_Weakness:
  1. Maybe
  2. No
  3. Yes
Enter the option number: 1

mental_health_interview:
  1. Maybe
  2. No
  3. Yes
Enter the option nu




Predicted Stress (Mood Swing) Class: 1


In [8]:
# === 1. Data Preprocessing ===

# Load the dataset
file_path = "Mental Health Dataset.csv"
df = pd.read_csv(file_path)

# Drop irrelevant columns (adjust as needed)
df = df.drop(columns=['Timestamp', 'Country', 'Occupation'])

# Fill missing values using the mode for each column
df = df.fillna(df.mode().iloc[0])

# Identify all categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Categorical columns:", categorical_cols)

# Encode all categorical columns using LabelEncoder
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


Categorical columns: ['Gender', 'self_employed', 'family_history', 'treatment', 'Days_Indoors', 'Growing_Stress', 'Changes_Habits', 'Mental_Health_History', 'Mood_Swings', 'Coping_Struggles', 'Work_Interest', 'Social_Weakness', 'mental_health_interview', 'care_options']


In [14]:
a = label_encoders['Days_Indoors']

In [23]:

for a in label_encoders:
    print(a)
    print(label_encoders[a].__dict__)

Gender
{'classes_': array(['Female', 'Male'], dtype=object)}
self_employed
{'classes_': array(['No', 'Yes'], dtype=object)}
family_history
{'classes_': array(['No', 'Yes'], dtype=object)}
treatment
{'classes_': array(['No', 'Yes'], dtype=object)}
Days_Indoors
{'classes_': array(['1-14 days', '15-30 days', '31-60 days', 'Go out Every day',
       'More than 2 months'], dtype=object)}
Growing_Stress
{'classes_': array(['Maybe', 'No', 'Yes'], dtype=object)}
Changes_Habits
{'classes_': array(['Maybe', 'No', 'Yes'], dtype=object)}
Mental_Health_History
{'classes_': array(['Maybe', 'No', 'Yes'], dtype=object)}
Mood_Swings
{'classes_': array(['High', 'Low', 'Medium'], dtype=object)}
Coping_Struggles
{'classes_': array(['No', 'Yes'], dtype=object)}
Work_Interest
{'classes_': array(['Maybe', 'No', 'Yes'], dtype=object)}
Social_Weakness
{'classes_': array(['Maybe', 'No', 'Yes'], dtype=object)}
mental_health_interview
{'classes_': array(['Maybe', 'No', 'Yes'], dtype=object)}
care_options
{'classe

In [20]:
questions_options = {
    "Gender": ["Female", "Male"],
    "self_employed": ["No", "Yes"],
    "family_history": ["No", "Yes"],
    "treatment": ["No", "Yes"],
    "Days_Indoors": ["Go out Every day", "1-14 days", "15-30 days", "31-60 days", "More than 2 months"],
    "Growing_Stress": ["Maybe", "No", "Yes"],
    "Changes_Habits": ["Maybe", "No", "Yes"],
    "Mental_Health_History": ["Maybe", "No", "Yes"],
    "Coping_Struggles": ["No", "Yes"],
    "Work_Interest": ["Maybe", "No", "Yes"],
    "Social_Weakness": ["Maybe", "No", "Yes"],
    "mental_health_interview": ["Maybe", "No", "Yes"],
    "care_options": ["No", "Not sure", "Yes"]
}

{'Gender': LabelEncoder(),
 'self_employed': LabelEncoder(),
 'family_history': LabelEncoder(),
 'treatment': LabelEncoder(),
 'Days_Indoors': LabelEncoder(),
 'Growing_Stress': LabelEncoder(),
 'Changes_Habits': LabelEncoder(),
 'Mental_Health_History': LabelEncoder(),
 'Mood_Swings': LabelEncoder(),
 'Coping_Struggles': LabelEncoder(),
 'Work_Interest': LabelEncoder(),
 'Social_Weakness': LabelEncoder(),
 'mental_health_interview': LabelEncoder(),
 'care_options': LabelEncoder()}

In [26]:
!pip list

Package                       Version
----------------------------- ---------
absl-py                       2.0.0
aiobotocore                   2.5.0
aiofiles                      22.1.0
aiogrpc                       1.8
aiohttp                       3.9.3
aiohttp-retry                 2.8.3
aioitertools                  0.7.1
aiosignal                     1.2.0
aiosqlite                     0.18.0
alabaster                     0.7.12
anaconda-catalogs             0.2.0
anaconda-client               1.12.0
anaconda-navigator            2.4.2
anaconda-project              0.11.1
annotated-types               0.6.0
anyio                         3.5.0
appdirs                       1.4.4
applaunchservices             0.3.0
appnope                       0.1.2
appscript                     1.1.2
APScheduler                   3.10.4
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
arrow                         1.2.3
astroid                   