In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [39]:
df = pd.read_csv("./Mental Health Dataset.csv")
df.head()

Unnamed: 0,Timestamp,Gender,Country,Occupation,self_employed,family_history,treatment,Days_Indoors,Growing_Stress,Changes_Habits,Mental_Health_History,Mood_Swings,Coping_Struggles,Work_Interest,Social_Weakness,mental_health_interview,care_options
0,2014-08-27 11:29:31,Female,United States,Corporate,,No,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Not sure
1,2014-08-27 11:31:50,Female,United States,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,No
2,2014-08-27 11:32:39,Female,United States,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes
3,2014-08-27 11:37:59,Female,United States,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,Maybe,Yes
4,2014-08-27 11:43:36,Female,United States,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes


In [40]:
#Capitalizing first letter of all columns
df.columns = df.columns.str.capitalize()
print(df.columns)

Index(['Timestamp', 'Gender', 'Country', 'Occupation', 'Self_employed',
       'Family_history', 'Treatment', 'Days_indoors', 'Growing_stress',
       'Changes_habits', 'Mental_health_history', 'Mood_swings',
       'Coping_struggles', 'Work_interest', 'Social_weakness',
       'Mental_health_interview', 'Care_options'],
      dtype='object')


In [41]:
#Drop Timestamp column
df.drop('Timestamp', axis=1, inplace=True)
df.head()

Unnamed: 0,Gender,Country,Occupation,Self_employed,Family_history,Treatment,Days_indoors,Growing_stress,Changes_habits,Mental_health_history,Mood_swings,Coping_struggles,Work_interest,Social_weakness,Mental_health_interview,Care_options
0,Female,United States,Corporate,,No,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Not sure
1,Female,United States,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,No
2,Female,United States,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes
3,Female,United States,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,Maybe,Yes
4,Female,United States,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes


In [42]:
df.drop('Self_employed', axis=1, inplace=True)
df.head()

Unnamed: 0,Gender,Country,Occupation,Family_history,Treatment,Days_indoors,Growing_stress,Changes_habits,Mental_health_history,Mood_swings,Coping_struggles,Work_interest,Social_weakness,Mental_health_interview,Care_options
0,Female,United States,Corporate,No,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Not sure
1,Female,United States,Corporate,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,No
2,Female,United States,Corporate,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes
3,Female,United States,Corporate,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,Maybe,Yes
4,Female,United States,Corporate,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes


In [43]:
#Converting Categorical variables to numerical using Label Encoding
LE = LabelEncoder()
for column in df.columns:
    df[column] = LE.fit_transform(df[column])

df.head()

Unnamed: 0,Gender,Country,Occupation,Family_history,Treatment,Days_indoors,Growing_stress,Changes_habits,Mental_health_history,Mood_swings,Coping_struggles,Work_interest,Social_weakness,Mental_health_interview,Care_options
0,0,11,1,0,1,0,2,1,2,2,0,1,2,1,1
1,0,11,1,1,1,0,2,1,2,2,0,1,2,1,0
2,0,11,1,1,1,0,2,1,2,2,0,1,2,1,2
3,0,11,1,1,1,0,2,1,2,2,0,1,2,0,2
4,0,11,1,1,1,0,2,1,2,2,0,1,2,1,2


In [44]:
#Split the dataset into features and target variable
X = df.drop('Treatment', axis=1)
Y = df['Treatment']

In [45]:
#Split the data into training and testing dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=40)

In [46]:
#Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [47]:
#Inialize and train the GradientBoost
model =GradientBoostingClassifier(random_state=42)
model.fit(X_train, Y_train)

In [48]:
#Make predictions on the test dataset
y_pred = model.predict(X_test)

In [49]:
#Evaluate the model
print(accuracy_score(Y_test, y_pred))
print((classification_report(Y_test, y_pred)))
print(confusion_matrix(Y_test, y_pred))

0.8134687332977018
              precision    recall  f1-score   support

           0       0.85      0.50      0.63       596
           1       0.80      0.96      0.88      1274
           2       0.00      0.00      0.00         1

    accuracy                           0.81      1871
   macro avg       0.55      0.49      0.50      1871
weighted avg       0.82      0.81      0.80      1871

[[ 299  297    0]
 [  51 1223    0]
 [   1    0    0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##**Other Commonly Used Models**

In [50]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(accuracy_score(Y_test, y_pred))

0.7429182255478354


In [51]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(accuracy_score(Y_test, y_pred))

0.7006948156066275


In [52]:
from sklearn import svm
model = svm.SVC()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(accuracy_score(Y_test, y_pred))

0.7792624265098878


In [53]:
from sklearn import tree
model = tree.DecisionTreeClassifier()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(accuracy_score(Y_test, y_pred))

0.7696419027258151


## Using a well Defined Neural Network for around 150 epochs, we tend to achieve an atmost accuracy of 80%

In [54]:
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import numpy as np

# Split the dataset
# 60% train, 20% test, 20% validation
X_train, X_temp, y_train, y_temp = train_test_split(X, Y, test_size=0.4, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Build the model
model = tf.keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define the fit function
def fit_model(model, X_train, y_train, epochs=150, validation_data=(X_val, y_val)):
    model.fit(X_train, y_train, epochs=epochs, validation_data=validation_data)

# Define the predict function
def predict_model(model, X_test):
    return model.predict(X_test)

# Define the accuracy function
def get_accuracy(model, X_test, y_test):
    predictions = predict_model(model, X_test)
    rounded_predictions = tf.round(predictions).numpy().flatten()
    correct_predictions = np.equal(rounded_predictions, y_test)
    accuracy = np.mean(correct_predictions)
    return accuracy

# Fit the model
fit_model(model, X_train, y_train, validation_data=(X_val, y_val))

# Get the accuracy
accuracy = get_accuracy(model, X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78