# Mental Health Classification with Neural Network

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import math
import tensorflow as tf
import keras
import keras_tuner as kt

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

from tensorflow.keras import callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Activation, Dropout, InputLayer, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy
from keras import layers, Sequential
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import scale, OneHotEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from keras import backend as K

In [2]:
df = pd.read_csv('../datasets/mentalhealth.csv')
# df = df.iloc[0:3000]
df.head()

Unnamed: 0,Timestamp,Gender,Country,Occupation,self_employed,family_history,treatment,Days_Indoors,Growing_Stress,Changes_Habits,Mental_Health_History,Mood_Swings,Coping_Struggles,Work_Interest,Social_Weakness,mental_health_interview,care_options
0,8/27/2014 11:29,Female,United States,Corporate,,No,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Not sure
1,8/27/2014 11:31,Female,United States,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,No
2,8/27/2014 11:32,Female,United States,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes
3,8/27/2014 11:37,Female,United States,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,Maybe,Yes
4,8/27/2014 11:43,Female,United States,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes


In [3]:
df.drop(['Timestamp', 'Gender', 'Country'], axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,Occupation,self_employed,family_history,treatment,Days_Indoors,Growing_Stress,Changes_Habits,Mental_Health_History,Mood_Swings,Coping_Struggles,Work_Interest,Social_Weakness,mental_health_interview,care_options
0,Corporate,,No,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Not sure
1,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,No
2,Corporate,,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes
3,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,Maybe,Yes
4,Corporate,No,Yes,Yes,1-14 days,Yes,No,Yes,Medium,No,No,Yes,No,Yes


## Mencari info dari data

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 292364 entries, 0 to 292363
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype 
---  ------                   --------------   ----- 
 0   Occupation               292364 non-null  object
 1   self_employed            287162 non-null  object
 2   family_history           292364 non-null  object
 3   treatment                292364 non-null  object
 4   Days_Indoors             292364 non-null  object
 5   Growing_Stress           292364 non-null  object
 6   Changes_Habits           292364 non-null  object
 7   Mental_Health_History    292364 non-null  object
 8   Mood_Swings              292364 non-null  object
 9   Coping_Struggles         292364 non-null  object
 10  Work_Interest            292364 non-null  object
 11  Social_Weakness          292364 non-null  object
 12  mental_health_interview  292364 non-null  object
 13  care_options             292364 non-null  object
dtypes: object(14)
memory

In [6]:
df['self_employed'].fillna(df['self_employed'].mode()[0], inplace=True)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 292364 entries, 0 to 292363
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype 
---  ------                   --------------   ----- 
 0   Occupation               292364 non-null  object
 1   self_employed            292364 non-null  object
 2   family_history           292364 non-null  object
 3   treatment                292364 non-null  object
 4   Days_Indoors             292364 non-null  object
 5   Growing_Stress           292364 non-null  object
 6   Changes_Habits           292364 non-null  object
 7   Mental_Health_History    292364 non-null  object
 8   Mood_Swings              292364 non-null  object
 9   Coping_Struggles         292364 non-null  object
 10  Work_Interest            292364 non-null  object
 11  Social_Weakness          292364 non-null  object
 12  mental_health_interview  292364 non-null  object
 13  care_options             292364 non-null  object
dtypes: object(14)
memory

In [8]:
df.columns

Index(['Occupation', 'self_employed', 'family_history', 'treatment',
       'Days_Indoors', 'Growing_Stress', 'Changes_Habits',
       'Mental_Health_History', 'Mood_Swings', 'Coping_Struggles',
       'Work_Interest', 'Social_Weakness', 'mental_health_interview',
       'care_options'],
      dtype='object')

In [9]:
df.replace({
    'self_employed': {
        'Yes': 1,
        'No': 0
    },
    'family_history': {
        'Yes': 1,
        'No': 0
    },
    'treatment': {
        'Yes': 1,
        'No': 0
    },
    'Days_Indoors': {
        'Go out Every day': 1,
        '1-14 days': 2,
        '15-30 days': 3,
        '31-60 days': 4,
        'More than 2 months': 5
    },
    'Growing_Stress': {
        'Yes': 3,
        'Maybe': 2,
        'No': 1
    },
    'Changes_Habits': {
        'Yes': 3,
        'Maybe': 2,
        'No': 1
    },
    'Mental_Health_History': {
        'Yes': 3,
        'Maybe': 2,
        'No': 1
    },
    'Mood_Swings': {
        'Medium': 2,
        'Low': 1,
        'High': 3
    },
    'Coping_Struggles': {
        'Yes': 1,
        'No': 0
    },
    'Work_Interest': {
        'Yes': 3,
        'Maybe': 2,
        'No': 1
    },
    'Social_Weakness': {
        'Yes': 3,
        'No': 1,
        'Maybe': 2
    },
    'mental_health_interview': {
        'Yes': 3,
        'No': 1,
        'Maybe': 2
    },
    'care_options': {
        'Not sure': 2,
        'No': 1,
        'Yes': 3,
        'Ye': 3
    }
}, inplace=True)

In [10]:
for column in df.columns:
    print(f"Column Name: {column} - {df[column].unique()}")

Column Name: Occupation - ['Corporate' 'Student' 'Business' 'Housewife' 'Others']
Column Name: self_employed - [0 1]
Column Name: family_history - [0 1]
Column Name: treatment - [1 0]
Column Name: Days_Indoors - [2 1 5 3 4]
Column Name: Growing_Stress - [3 1 2]
Column Name: Changes_Habits - [1 3 2]
Column Name: Mental_Health_History - [3 1 2]
Column Name: Mood_Swings - [2 1 3]
Column Name: Coping_Struggles - [0 1]
Column Name: Work_Interest - [1 2 3]
Column Name: Social_Weakness - [3 1 2]
Column Name: mental_health_interview - [1 2 3]
Column Name: care_options - [2 1 3]


In [11]:
df.tail()

Unnamed: 0,Occupation,self_employed,family_history,treatment,Days_Indoors,Growing_Stress,Changes_Habits,Mental_Health_History,Mood_Swings,Coping_Struggles,Work_Interest,Social_Weakness,mental_health_interview,care_options
292359,Business,1,1,1,3,1,2,1,1,1,1,2,2,2
292360,Business,0,1,1,3,1,2,1,1,1,1,2,1,3
292361,Business,0,1,0,3,1,2,1,1,1,1,2,1,1
292362,Business,0,1,1,3,1,2,1,1,1,1,2,1,3
292363,Business,0,1,1,3,1,2,1,1,1,1,2,1,3


In [12]:
df = pd.get_dummies(df, columns=['Occupation'])
df.tail()

Unnamed: 0,self_employed,family_history,treatment,Days_Indoors,Growing_Stress,Changes_Habits,Mental_Health_History,Mood_Swings,Coping_Struggles,Work_Interest,Social_Weakness,mental_health_interview,care_options,Occupation_Business,Occupation_Corporate,Occupation_Housewife,Occupation_Others,Occupation_Student
292359,1,1,1,3,1,2,1,1,1,1,2,2,2,1,0,0,0,0
292360,0,1,1,3,1,2,1,1,1,1,2,1,3,1,0,0,0,0
292361,0,1,0,3,1,2,1,1,1,1,2,1,1,1,0,0,0,0
292362,0,1,1,3,1,2,1,1,1,1,2,1,3,1,0,0,0,0
292363,0,1,1,3,1,2,1,1,1,1,2,1,3,1,0,0,0,0


In [13]:
X = df.drop('treatment', axis=1)
y = df['treatment']

## Split the Data

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, train_size=0.05)
X_train = scale(X_train)
X_test = scale(X_test)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (14618, 17)
y_train shape: (14618,)
X_test shape: (2924, 17)
y_test shape: (2924,)


## Create Sklearn Model

In [15]:
def train_model(Model, name: str):
    model = Model()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    print("Model:", name)
    print(classification_report(y_test, predictions))

In [16]:
# train_model(SVC, "Support Vector Machine")
# train_model(GaussianNB, "Naive Bayes")
# train_model(KNeighborsClassifier, "Support Vector Machine")

In [17]:
models = {
    'svm': [SVC(), {
        'C': [0.5, 1, 10, 100, 1000], 
        'gamma': ['scale', 1, 0.1, 0.001, 0.0001], 
        'kernel': ['rbf']
    }],
    'nb': [GaussianNB(), {
        
    }],
    'knn': [KNeighborsClassifier(), {
        'n_neighbors': [3, 5, 7, 9, 11, 13, 15, 17, 19, 21]
    }],
}

best_models = {}
for model in models:
    best = GridSearchCV(
      models[model][0],
      models[model][1],
      cv=5,
      scoring='accuracy',
      verbose=0
    )

    best_models[model] = best

print(best_models)

{'svm': GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.5, 1, 10, 100, 1000],
                         'gamma': ['scale', 1, 0.1, 0.001, 0.0001],
                         'kernel': ['rbf']},
             scoring='accuracy'), 'nb': GridSearchCV(cv=5, estimator=GaussianNB(), param_grid={}, scoring='accuracy'), 'knn': GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 5, 7, 9, 11, 13, 15, 17, 19, 21]},
             scoring='accuracy')}


In [18]:
# svm = best_models['svm']
# svm.fit(X_train, y_train)

# nb = best_models['nb']
# nb.fit(X_train, y_train)

# knn = best_models['knn']
# knn.fit(X_train, y_train)

In [None]:
print('SVM:', svm.best_params_)
print('Naive Bayes', nb.best_params_)
print('K-Nearest Neighbors', knn.best_params_)

In [19]:
svm = SVC(C=1000, gamma=0.001, kernel='rbf')
nb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=9)

In [None]:
svm.fit(X_train, y_train)
nb.fit(X_train, y_train)
knn.fit(X_train, y_train)

In [None]:
def predictions_report(model, name):
    predictions = model.predict(X_test)
    print("Name:", name)
    print(classification_report(y_test, predictions))

In [None]:
predictions_report(svm, "SVM")
predictions_report(nb, "NB")
predictions_report(knn, "KNN")