## Depression Level Prediction

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn import metrics
import numpy as np

In [2]:
train_data = pd.read_csv('../datasets/train.csv')
test_data = pd.read_csv('../datasets/test.csv')

X_train = train_data['text']
y_train = train_data['labels']
X_test = test_data['text']
y_test = test_data['labels']

vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [17]:
# sklearn decision tree classifier: default settings
clf1 = DecisionTreeClassifier()
clf1.fit(X_train_vectorized, y_train)

y_pred = clf1.predict(X_test_vectorized)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))

Accuracy: 0.51
              precision    recall  f1-score   support

           0       0.20      0.26      0.22       228
           1       0.70      0.57      0.63      2169
           2       0.31      0.43      0.36       848

    accuracy                           0.51      3245
   macro avg       0.40      0.42      0.40      3245
weighted avg       0.56      0.51      0.53      3245



In [18]:
# sklearn decision tree classifier:
clf2 = DecisionTreeClassifier(criterion="gini", max_depth=5)
clf2 = clf2.fit(X_train_vectorized,y_train)

y_pred = clf2.predict(X_test_vectorized)
print(f"Accuracy: {metrics.accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 0.64
              precision    recall  f1-score   support

           0       0.26      0.11      0.15       228
           1       0.69      0.86      0.76      2169
           2       0.40      0.21      0.28       848

    accuracy                           0.64      3245
   macro avg       0.45      0.39      0.40      3245
weighted avg       0.58      0.64      0.59      3245



In [55]:
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing

# increased max_iter (default=200)
clf3 = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(5, 2), random_state=1, max_iter=5000)
clf3.fit(X_train_vectorized,y_train)

y_pred = clf2.predict(X_test_vectorized)
print(f"Accuracy: {metrics.accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 0.64
              precision    recall  f1-score   support

           0       0.26      0.11      0.15       228
           1       0.69      0.86      0.76      2169
           2       0.40      0.21      0.28       848

    accuracy                           0.64      3245
   macro avg       0.45      0.39      0.40      3245
weighted avg       0.58      0.64      0.59      3245



In [60]:
input_text = '''
I'm hopeless.
'''

In [61]:
classes = {'2': 'Not depressed', '1': 'Moderately depressed', '0': 'Severely depressed'}

input_vector = vectorizer.transform([input_text])

my_testing =  clf1.predict(input_vector)
print(f'DT classifier 1 {classes[str(my_testing[0])]}')

my_testing =  clf2.predict(input_vector)
print(f'DT classifier 2 {classes[str(my_testing[0])]}')

my_testing =  clf3.predict(input_vector)
print(f'NN classifier 3 {classes[str(my_testing[0])]}')

DT classifier 1 Not depressed
DT classifier 2 Not depressed
NN classifier 3 Moderately depressed
