In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
#  Import and read the charity_data.csv.
import pandas as pd
mental_df = pd.read_csv("student_mental_health.csv")
mental_df.head()


Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,Your current year of Study,What is your CGPA?,Marital status,Do you have Depression?,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


In [3]:
# Rename the columns
mental_df.columns = ['Timestamp','Gender','Age','Course','Year_of_Study','CGPA','Marital_Status','Depression','Anxiety','Panic_Atack','Treatment']

In [4]:
# Display data with new header names
mental_df.head(2)

Unnamed: 0,Timestamp,Gender,Age,Course,Year_of_Study,CGPA,Marital_Status,Depression,Anxiety,Panic_Atack,Treatment
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No


In [5]:
# Convert categorical data to numeric with `pd.get_dummies`
A_df = pd.get_dummies(mental_df)
A_df.head(2)

Unnamed: 0,Age,Timestamp_13/07/2020 10:07:32,Timestamp_13/07/2020 10:10:30,Timestamp_13/07/2020 10:11:26,Timestamp_13/07/2020 10:12:18,Timestamp_13/07/2020 10:12:26,Timestamp_13/07/2020 10:12:28,Timestamp_13/07/2020 10:14:46,Timestamp_13/07/2020 10:33:47,Timestamp_13/07/2020 10:34:08,...,Marital_Status_No,Marital_Status_Yes,Depression_No,Depression_Yes,Anxiety_No,Anxiety_Yes,Panic_Atack_No,Panic_Atack_Yes,Treatment_No,Treatment_Yes
0,18.0,0,0,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,1,1,0
1,21.0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,1,1,0,1,0


In [6]:
# Determine the number of ubique variables
mental_df.nunique()

Timestamp         92
Gender             2
Age                7
Course            49
Year_of_Study      7
CGPA               6
Marital_Status     2
Depression         2
Anxiety            2
Panic_Atack        2
Treatment          2
dtype: int64

Data Processing

In [7]:
mental_df['Age'].fillna(mental_df['Age'].median(),inplace=True)

In [8]:
mental_df['Year_of_Study'] = mental_df['Year_of_Study'].astype(str).str.extract('(\d+)').astype(int)

In [9]:
mental_df['Timestamp'] = pd.to_datetime(mental_df['Timestamp'])

In [10]:
def calculate_cgpa(x):
    if isinstance(x,str):
        first,second=x.split('-')
        return (float(first.strip())+float(second.strip()))/2
    else:
        return x
mental_df['CGPA']=mental_df['CGPA'].apply(calculate_cgpa)

In [11]:
le=LabelEncoder()
cat_cols=['Gender','Marital_Status','Depression','Anxiety','Panic_Atack','Treatment']

for col in cat_cols:
    mental_df[col]=le.fit_transform(mental_df[col])
mental_df

Unnamed: 0,Timestamp,Gender,Age,Course,Year_of_Study,CGPA,Marital_Status,Depression,Anxiety,Panic_Atack,Treatment
0,2020-08-07 12:02:00,0,18.0,Engineering,1,3.245,0,1,0,1,0
1,2020-08-07 12:04:00,1,21.0,Islamic education,2,3.245,0,0,1,0,0
2,2020-08-07 12:05:00,1,19.0,BIT,1,3.245,0,1,1,1,0
3,2020-08-07 12:06:00,0,22.0,Laws,3,3.245,1,1,0,0,0
4,2020-08-07 12:13:00,1,23.0,Mathemathics,4,3.245,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
96,2020-07-13 19:56:49,0,21.0,BCS,1,3.750,0,0,1,0,0
97,2020-07-13 21:21:42,1,18.0,Engineering,2,3.245,0,1,1,0,0
98,2020-07-13 21:22:56,0,19.0,Nursing,3,3.750,1,1,0,1,0
99,2020-07-13 21:23:57,0,23.0,Pendidikan Islam,4,3.750,0,0,0,0,0


In [12]:
# Drop Timestamp and Course- this will have no impact on the model
mental_clean_df = mental_df.drop(columns={"Timestamp", "Course"})
mental_clean_df.head(2)

Unnamed: 0,Gender,Age,Year_of_Study,CGPA,Marital_Status,Depression,Anxiety,Panic_Atack,Treatment
0,0,18.0,1,3.245,0,1,0,1,0
1,1,21.0,2,3.245,0,0,1,0,0


In [13]:
# Split our preprocessed data into our features and target arrays
x = mental_clean_df.drop(columns=['Depression']).values
y = mental_clean_df['Depression'].values


In [14]:
len(x[0])

8

In [15]:
x[0]

array([ 0.   , 18.   ,  1.   ,  3.245,  0.   ,  0.   ,  1.   ,  0.   ])

In [16]:
y

array([1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0])

In [17]:
# split into training and testing data
x_train, x_test, y_train, y_test = train_test_split(x, y)



In [18]:
len(x_train[0])

8

In [19]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
x_scaler = scaler.fit(x_train)

# Scale the data
x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)
print(x_train_scaled[0])

[-0.54139029 -0.98376941  2.15982195  0.64115466 -0.43643578 -0.70710678
 -0.66506217 -0.23735633]


In [20]:
# Create a keras sequentila model
nn_model = tf.keras.models.Sequential()


In [21]:
# Add first dense layer including the input layer (hidden layer)
nn_model.add(tf.keras.layers.Dense(units=5, activation="relu", input_dim =8))
nn_model.add(tf.keras.layers.Dense(units=5, activation="relu"))


In [22]:
# output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


In [23]:
# Check the structure of the model
nn_model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 5)                 45        
                                                                 
 dense_1 (Dense)             (None, 5)                 30        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 81 (324.00 Byte)
Trainable params: 81 (324.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [24]:
# Compile the model
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [25]:
# Train the model
fit_model = nn_model.fit(x_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [26]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(x_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1/1 - 0s - loss: 0.4601 - accuracy: 0.8462 - 366ms/epoch - 366ms/step
Loss: 0.460055410861969, Accuracy: 0.8461538553237915
