# Depression, Anxiety, & Stress estimation using AI and Data Science
*A project made for HealthCoder-2023*

**Team Members:**
   - Shraddha Pattnaik (B420047)
   - Soumyajeet Muni (B420051)
   - Swastik Babu (B420055)

## Import Necessaries Libraries

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from scipy.stats import mode

import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv(r'C:\Users\visha\Downloads\DASS_data_21.02.19\DASS_data_21.02.19\data.csv',sep='\t')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39775 entries, 0 to 39774
Data columns (total 1 columns):
 #   Column                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

## Data Cleaning
In the dataset, VCL6, VCL9, VCL12 are not real words and are used for validity check.
1. Create a new column in the DataFrame called 'wrongansw' and initializes it to zero for all rows.
2. Update the 'wrongansw' column based on a condition:
    It uses the where method of the DataFrame to apply a condition where df[''] equals 0. 
    If the condition is true, then the 'wrongansw' column remains unchanged.
    If the condition is false, then the 'wrongansw' column is incremented by 1.
3. Use answers that have checked at least 2 of the words
4. Drop 'wrongansw' and 'VCL' columns
5. Drop 'major' columns

In [7]:
df['wrongansw'] = 0
df['wrongansw'] = df['wrongansw'].where(df['VCL6']== 0, df['wrongansw'] + 1)
df['wrongansw'] = df['wrongansw'].where(df['VCL9']== 0, df['wrongansw'] + 1)
df['wrongansw'] = df['wrongansw'].where(df['VCL12']== 0, df['wrongansw'] + 1)

df.wrongansw.value_counts()

KeyError: 'VCL6'

In [None]:
df = df[df['wrongansw'].isin([2, 3])]
df = df.drop(columns='wrongansw')
df.head(3)
df.shape

In [None]:
# removing the validity check columns
vcls = []
for i in range(1, 17):
    vcls.append('VCL' + str(i))
    
df = df.drop(columns=vcls)
df.shape

In [None]:
categorical = df.select_dtypes('object').columns

print('Categorical Columns: ', df[categorical].columns)
print(df[categorical].nunique())

# remove 'major' column
df = df.drop(['major'], axis=1)

## Labelling the questions that are for specific mental disorder

In [None]:
DASS_keys = {
    'Depression': [3, 5, 10, 13, 16, 17, 21, 24, 26, 31, 34, 37, 38, 42],
    'Anxiety': [2, 4, 7, 9, 15, 19, 20, 23, 25, 28, 30, 36, 40, 41],
    'Stress': [1, 6, 8, 11, 12, 14, 18, 22, 27, 29, 32, 33, 35, 39]
    }

## Data Collection
The scores for anxiety, depression and stress were calculated by adding the values associated with the answers to each question of the particular class. Once the final scores had been calculated, they were labelled according to severity, i.e. Extremely Severe, Severe, Moderate, Mild and Normal.

In [None]:
depr = []
for i in DASS_keys["Depression"]:
    depr.append('Q' + str(i) + 'A')
    
anx = []
for i in DASS_keys["Anxiety"]:
    anx.append('Q' + str(i) + 'A')

stre = []
for i in DASS_keys["Stress"]:
    stre.append('Q' + str(i) + 'A')

# making a new dataframe for each disorder 

df_depr=df.filter(depr) 
df_anx=df.filter(anx)
df_stre=df.filter(stre)

In [None]:
disorders = [depr, anx, stre]

def scores(df): # adds new columns to store total scores for each        
    df["ScoresDepr"] = df[depr].sum(axis=1) 
    df["ScoresAnx"] = df[anx].sum(axis=1)
    df["ScoresStre"] = df[stre].sum(axis=1)
    return df

for i in disorders:
        df[i] -= 1 # to get the scores
        
df = scores(df)
        
df.head()

In [None]:
# CATEGORY: DEPRESSION
CategoryDepr=[]

for i in df['ScoresDepr']:
    if i in range(0,10):
        CategoryDepr.append('Normal')
    elif i in range(10,14):
        CategoryDepr.append('Minimal')
    elif i in range(14,21):
        CategoryDepr.append('Moderate')
    elif i in range(21,28):
        CategoryDepr.append('Severe')
    else:
        CategoryDepr.append('Extremely Severe')
        
df['CategoryDepr']= CategoryDepr

In [None]:
# CATEGORY: ANXIETY
CategoryAnx=[]

for i in df['ScoresAnx']:
    if i in range(0,8):
        CategoryAnx.append('Normal')
    elif i in range(8,10):
        CategoryAnx.append('Minimal')
    elif i in range(10,15):
        CategoryAnx.append('Moderate')
    elif i in range(15,20):
        CategoryAnx.append('Severe')
    else:
        CategoryAnx.append('Extremely Severe')
        
df['CategoryAnx']= CategoryAnx

In [None]:
# CATEGORY: STRESS
CategoryStre=[]

for i in df['ScoresStre']:
    if i in range(0,15):
        CategoryStre.append('Normal')
    elif i in range(15,19):
        CategoryStre.append('Minimal')
    elif i in range(19,26):
        CategoryStre.append('Moderate')
    elif i in range(26,34):
        CategoryStre.append('Severe')
    else:
        CategoryStre.append('Extremely Severe')
        
df['CategoryStre']= CategoryStre

In [None]:
# checking for duplicates
df.isnull().sum()
df.duplicated().sum()

In [None]:
df

## Data Classification Models for Depression

### Data Pre-processing

In [None]:
Y = df['CategoryDepr']
X = df.drop(columns=['Q2A','Q4A','Q7A','Q9A','Q15A','Q19A','Q20A','Q23A','Q25A','Q28A','Q30A','Q36A','Q40A','Q41A','introelapse','testelapse', 'surveyelapse','engnat','CategoryAnx', 'CategoryDepr', 'CategoryStre','country', 'ScoresAnx' ,'ScoresStre','screensize','uniquenetworklocation','Q1A' ,'Q1I', 'Q1E', 'Q2I', 'Q2E', 'Q3A', 'Q3I', 'Q3E','Q4I','Q4E', 'Q5E', 'Q5A' ,'Q5I', 'Q6E', 'Q6A' ,'Q6I', 'Q7E' ,'Q7I', 'Q8E','Q8A' ,'Q8I', 'Q9E', 'Q9I', 'Q10E', 'Q10A', 'Q10I', 'Q11E', 'Q11A' ,'Q11I', 'Q12E', 'Q12A' ,'Q12I', 'Q13E', 'Q13A' ,'Q13I', 'Q14E', 'Q14A' ,'Q14I', 'Q15E', 'Q15I', 'Q16E', 'Q16A' ,'Q16I', 'Q17E', 'Q17A' ,'Q17I', 'Q18E', 'Q18A' ,'Q18I', 'Q19I', 'Q19E',  'Q20I', 'Q20E', 'Q21A', 'Q21I', 'Q21E', 'Q22A', 'Q22I','Q22E', 'Q23I','Q23E','Q24A','Q24I','Q24E', 'Q25I', 'Q25E', 'Q26A', 'Q26I', 'Q26E', 'Q27A', 'Q27I', 'Q27E', 'Q28I', 'Q28E', 'Q29A', 'Q29I', 'Q29E',  'Q30I', 'Q30E', 'Q31A', 'Q31I', 'Q31E', 'Q32A', 'Q32I', 'Q32E', 'Q33A', 'Q33I', 'Q33E', 'Q34A', 'Q34I', 'Q34E', 'Q35A', 'Q35I', 'Q35E', 'Q36I', 'Q36E', 'Q37A', 'Q37I', 'Q37E', 'Q38A', 'Q38I', 'Q38E', 'Q39A', 'Q39I', 'Q39E', 'Q40I', 'Q40E', 'Q41I', 'Q41E', 'Q42A', 'Q42I', 'Q42E'])
X.head()

In [None]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=85)

### Classification Models

In [None]:
# KNN Model for Depression Classification

# Create a KNN classifier with k (number of neighbors)
knn = KNeighborsClassifier(n_neighbors=95)

# Fit the model to the training data
knn.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = knn.predict(X_test)

# Calculate the accuracy score of the model
accuracy_knn = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of KNN Model - Depression: {:.5f}'.format(accuracy_knn))
print('Confusion Matrix of KNN Model - Depression: \n', conf_matrix)
print('Confusion Matrix of KNN Model - Depression: \n', classif_report)

In [None]:
# SVC Model for Depression Classification
svm_model = SVC(kernel='linear')

svm_model.fit(X_train, Y_train)

preds = svm_model.predict(X_test)

# Calculate the accuracy score of the model
accuracy_svc = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of SVM Model - Depression: {:.5f}'.format(accuracy_svc))
print('Confusion Matrix of SVM Model - Depression: \n', conf_matrix)
print('Confusion Matrix of SVM Model - Depression: \n', classif_report)

In [None]:
# Naive Bayes Model for Depression Classification

clfNB = GaussianNB()

# Fit the model to the training data
clfNB.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = clfNB.predict(X_test)

# Calculate the accuracy score of the model
accuracy_nb = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of NB Model - Depression: {:.5f}'.format(accuracy_nb))
print('Confusion Matrix of NB Model - Depression: \n', conf_matrix)
print('Classification Report of NB Model - Depression: \n', classif_report)

In [None]:
# Random Forest Classifier Model for Depression Classification

clfRFC = RandomForestClassifier(n_estimators=110, random_state=110)

# Fit the model to the training data
clfRFC.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = clfRFC.predict(X_test)

# Calculate the accuracy score of the model
accuracy_rfc = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)


print('Accuracy of RFC Model - Depression: {:.5f}'.format(accuracy_rfc))
print('Confusion Matrix of RFC Model - Depression: \n', conf_matrix)
print('Classification report of RFC Model - Depression: \n', classif_report)

In [None]:
# XG Boost Classifier For Depression Classifier
xgb_model = XGBClassifier()
le = LabelEncoder()

xgb_model.fit(X_train, le.fit_transform(Y_train))

Y_pred = xgb_model.predict(X_test)
Y_pred = le.inverse_transform(Y_pred)

accuracy_xgb = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of XGB Model - Depression: {:.5f}'.format(accuracy_xgb))
print('Confusion Matrix of XGB Model - Depression: \n', conf_matrix)
print('Classification report of XGB Model - Depression: \n', classif_report)

In [None]:
# Combining Naive Bayes, XG Boost and Random Forest

# Training the models on whole data
final_xgb_model = XGBClassifier()
final_nb_model = GaussianNB()
final_rfc_model = RandomForestClassifier(n_estimators=110, random_state=110)


final_xgb_model.fit(X_train, le.fit_transform(Y_train))
final_nb_model.fit(X_train, Y_train)
final_rfc_model.fit(X_train, Y_train)

# Making prediction by take mode of predictions made by all the classifiers
xgb_preds = final_xgb_model.predict(X_test)
xgb_preds = le.inverse_transform(xgb_preds)

nb_preds = final_nb_model.predict(X_test)
rfc_preds = final_rfc_model.predict(X_test)

final_preds = [mode([i,j,k])[0][0] for i,j, k in zip(xgb_preds, nb_preds, rfc_preds)]

accuracy_combined = accuracy_score(Y_test, final_preds)
conf_matrix = confusion_matrix(Y_test, final_preds)
classif_report = classification_report(Y_test, final_preds)

print('Accuracy of Combined Model - Depression: {:.5f}'.format(accuracy_combined))
print('Confusion Matrix of Combined Model - Depression: \n', conf_matrix)
print('Classification report of Combined Model - Depression: \n', classif_report)

In [None]:
accuracy_depression = [accuracy_knn, accuracy_svc, accuracy_nb, accuracy_rfc, accuracy_xgb, accuracy_combined]

depression_analysis = pd.DataFrame({
                        'Models' : ['KNN', 'SVC', 'Naive Bayes', 'Random Forest', 'XG Boost', 'Combined Model'],
                        'Accuracy_Depression': [x * 100 for x in accuracy_depression]
                    })

## Data Classification Models for Anxiety

### Data Pre-processing

In [None]:
Y = df['CategoryAnx']
X = df.drop(columns=['Q2A','Q4A','Q7A','Q9A','Q15A','Q19A','Q20A','Q23A','Q25A','Q28A','Q30A','Q36A','Q40A','Q41A','introelapse','testelapse', 'surveyelapse','engnat','CategoryAnx','CategoryDepr', 'CategoryStre','country', 'ScoresDepr' ,'ScoresStre','screensize','uniquenetworklocation','Q1A' ,'Q1I', 'Q1E', 'Q2I', 'Q2E', 'Q3A', 'Q3I', 'Q3E','Q4I','Q4E', 'Q5E', 'Q5A' ,'Q5I', 'Q6E', 'Q6A' ,'Q6I', 'Q7E' ,'Q7I', 'Q8E','Q8A' ,'Q8I', 'Q9E', 'Q9I', 'Q10E', 'Q10A', 'Q10I', 'Q11E', 'Q11A' ,'Q11I', 'Q12E', 'Q12A' ,'Q12I', 'Q13E', 'Q13A' ,'Q13I', 'Q14E', 'Q14A' ,'Q14I', 'Q15E', 'Q15I', 'Q16E', 'Q16A' ,'Q16I', 'Q17E', 'Q17A' ,'Q17I', 'Q18E', 'Q18A' ,'Q18I', 'Q19I', 'Q19E',  'Q20I', 'Q20E', 'Q21A', 'Q21I', 'Q21E', 'Q22A', 'Q22I','Q22E', 'Q23I','Q23E','Q24A','Q24I','Q24E', 'Q25I', 'Q25E', 'Q26A', 'Q26I', 'Q26E', 'Q27A', 'Q27I', 'Q27E', 'Q28I', 'Q28E', 'Q29A', 'Q29I', 'Q29E',  'Q30I', 'Q30E', 'Q31A', 'Q31I', 'Q31E', 'Q32A', 'Q32I', 'Q32E', 'Q33A', 'Q33I', 'Q33E', 'Q34A', 'Q34I', 'Q34E', 'Q35A', 'Q35I', 'Q35E', 'Q36I', 'Q36E', 'Q37A', 'Q37I', 'Q37E', 'Q38A', 'Q38I', 'Q38E', 'Q39A', 'Q39I', 'Q39E', 'Q40I', 'Q40E', 'Q41I', 'Q41E', 'Q42A', 'Q42I', 'Q42E'])
X.head()

In [None]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=85)

### Classification Models

In [None]:
# KNN Classification
# Create a KNN classifier with k (number of neighbors)
knn = KNeighborsClassifier(n_neighbors=95)

# Fit the model to the training data
knn.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = knn.predict(X_test)

# Calculate the accuracy score of the model
accuracy_knn = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of KNN Model - Anxiety: {:.5f}'.format(accuracy_knn))
print('Confusion Matrix of KNN Model - Anxiety: \n', conf_matrix)
print('Confusion Matrix of KNN Model - Anxiety: \n', classif_report)

In [None]:
# SVC Model for Anxiety Classification
svm_model = SVC(kernel='linear')

svm_model.fit(X_train, Y_train)

preds = svm_model.predict(X_test)

# Calculate the accuracy score of the model
accuracy_svc = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of SVM Model - Anxiety: {:.5f}'.format(accuracy_svc))
print('Confusion Matrix of SVM Model - Anxiety: \n', conf_matrix)
print('Confusion Matrix of SVM Model - Anxiety: \n', classif_report)

In [None]:
# Naive Bayes Model for Anxiety Classification
clfNB = GaussianNB()

# Fit the model to the training data
clfNB.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = clfNB.predict(X_test)

# Calculate the accuracy score of the model
accuracy_nb = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of NB Model - Anxiety: {:.5f}'.format(accuracy_nb))
print('Confusion Matrix of NB Model - Anxiety: \n', conf_matrix)
print('Classification Report of NB Model - Anxiety: \n', classif_report)

In [None]:
# Random Forest Classifier Model for Anxiety Classification
clfRFC = RandomForestClassifier(n_estimators=95, random_state=85)

# Fit the model to the training data
clfRFC.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = clfRFC.predict(X_test)

# Calculate the accuracy score of the model
accuracy_rfc = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)


print('Accuracy of RFC Model - Anxiety: {:.5f}'.format(accuracy_rfc))
print('Confusion Matrix of RFC Model - Anxiety: \n', conf_matrix)
print('Classification report of KNN Model - Anxiety: \n', classif_report)

In [None]:
# XG Boost Classifier For Anxiety Classifier
xgb_model = XGBClassifier()
le = LabelEncoder()

xgb_model.fit(X_train, le.fit_transform(Y_train))

Y_pred = xgb_model.predict(X_test)
Y_pred = le.inverse_transform(Y_pred)

accuracy_xgb = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of XGB Model - Anxiety: {:.5f}'.format(accuracy_xgb))
print('Confusion Matrix of XGB Model - Anxiety: \n', conf_matrix)
print('Classification report of XGB Model - Anxiety: \n', classif_report)

In [None]:
# Combining Naive Bayes, XG Boost and Random Forest

# Training the models on whole data
final_xgb_model = XGBClassifier()
final_nb_model = GaussianNB()
final_rfc_model = RandomForestClassifier(n_estimators=110, random_state=110)


final_xgb_model.fit(X_train, le.fit_transform(Y_train))
final_nb_model.fit(X_train, Y_train)
final_rfc_model.fit(X_train, Y_train)

# Making prediction by take mode of predictions made by all the classifiers
xgb_preds = final_xgb_model.predict(X_test)
xgb_preds = le.inverse_transform(xgb_preds)

nb_preds = final_nb_model.predict(X_test)
rfc_preds = final_rfc_model.predict(X_test)

final_preds = [mode([i,j,k])[0][0] for i,j, k in zip(xgb_preds, nb_preds, rfc_preds)]

accuracy_combined = accuracy_score(Y_test, final_preds)
conf_matrix = confusion_matrix(Y_test, final_preds)
classif_report = classification_report(Y_test, final_preds)

print('Accuracy of Combined Model - Anxiety: {:.5f}'.format(accuracy_combined))
print('Confusion Matrix of Combined Model - Anxiety: \n', conf_matrix)
print('Classification report of Combined Model - Anxiety: \n', classif_report)

In [None]:
accuracy_anxiety = [accuracy_knn, accuracy_svc, accuracy_nb, accuracy_rfc, accuracy_xgb, accuracy_combined]

anxiety_analysis = pd.DataFrame({
                        'Models' : ['KNN', 'SVC', 'Naive Bayes', 'Random Forest', 'XG Boost', 'Combined Model'],
                        'Accuracy_Anxiety': [x * 100 for x in accuracy_anxiety]
                    })

## Data Classification Models for Stress

### Data Preprocessing

In [None]:
Y = df['CategoryStre']
X = df.drop(columns=['Q2A','Q4A','Q7A','Q9A','Q15A','Q19A','Q20A','Q23A','Q25A','Q28A','Q30A','Q36A','Q40A','Q41A','introelapse','testelapse', 'surveyelapse','engnat','CategoryStre','CategoryAnx', 'CategoryDepr', 'country', 'ScoresDepr' ,'ScoresAnx','screensize','uniquenetworklocation','Q1A' ,'Q1I', 'Q1E', 'Q2I', 'Q2E', 'Q3A', 'Q3I', 'Q3E','Q4I','Q4E', 'Q5E', 'Q5A' ,'Q5I', 'Q6E', 'Q6A' ,'Q6I', 'Q7E' ,'Q7I', 'Q8E','Q8A' ,'Q8I', 'Q9E', 'Q9I', 'Q10E', 'Q10A', 'Q10I', 'Q11E', 'Q11A' ,'Q11I', 'Q12E', 'Q12A' ,'Q12I', 'Q13E', 'Q13A' ,'Q13I', 'Q14E', 'Q14A' ,'Q14I', 'Q15E', 'Q15I', 'Q16E', 'Q16A' ,'Q16I', 'Q17E', 'Q17A' ,'Q17I', 'Q18E', 'Q18A' ,'Q18I', 'Q19I', 'Q19E',  'Q20I', 'Q20E', 'Q21A', 'Q21I', 'Q21E', 'Q22A', 'Q22I','Q22E', 'Q23I','Q23E','Q24A','Q24I','Q24E', 'Q25I', 'Q25E', 'Q26A', 'Q26I', 'Q26E', 'Q27A', 'Q27I', 'Q27E', 'Q28I', 'Q28E', 'Q29A', 'Q29I', 'Q29E',  'Q30I', 'Q30E', 'Q31A', 'Q31I', 'Q31E', 'Q32A', 'Q32I', 'Q32E', 'Q33A', 'Q33I', 'Q33E', 'Q34A', 'Q34I', 'Q34E', 'Q35A', 'Q35I', 'Q35E', 'Q36I', 'Q36E', 'Q37A', 'Q37I', 'Q37E', 'Q38A', 'Q38I', 'Q38E', 'Q39A', 'Q39I', 'Q39E', 'Q40I', 'Q40E', 'Q41I', 'Q41E', 'Q42A', 'Q42I', 'Q42E'])
X.head()

In [None]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=85)

### Classification Models

In [None]:
# KNN Model for Stress Classification

# Create a KNN classifier with k (number of neighbors)
knn = KNeighborsClassifier(n_neighbors=95)

# Fit the model to the training data
knn.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = knn.predict(X_test)

# Calculate the accuracy score of the model
accuracy_knn = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of KNN Model - Stress: {:.5f}'.format(accuracy_knn))
print('Confusion Matrix of KNN Model - Stress: \n', conf_matrix)
print('Confusion Matrix of KNN Model - Stress: \n', classif_report)

In [None]:
# SVC Model for Stress Classification
svm_model = SVC(kernel='linear')

svm_model.fit(X_train, Y_train)

preds = svm_model.predict(X_test)

# Calculate the accuracy score of the model
accuracy_svc = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of SVM Model - Anxiety: {:.5f}'.format(accuracy_svc))
print('Confusion Matrix of SVM Model - Anxiety: \n', conf_matrix)
print('Confusion Matrix of SVM Model - Anxiety: \n', classif_report)

In [None]:
# Naive Bayes Model for Stress Classification

clfNB = GaussianNB()

# Fit the model to the training data
clfNB.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = clfNB.predict(X_test)

# Calculate the accuracy score of the model
accuracy_nb = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of NB Model - Stress: {:.5f}'.format(accuracy_nb))
print('Confusion Matrix of NB Model - Stress: \n', conf_matrix)
print('Classification Report of KNN Model - Stress: \n', classif_report)

In [None]:
# Random Forest Classifier Model for Stress Classification

clfRFC = RandomForestClassifier(n_estimators=95, random_state=85)

# Fit the model to the training data
clfRFC.fit(X_train, Y_train)

# Predict the classes of the testing set
Y_pred = clfRFC.predict(X_test)

# Calculate the accuracy score of the model
accuracy_rfc = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)


print('Accuracy of RFC Model - Stress: {:.5f}'.format(accuracy_rfc))
print('Confusion Matrix of RFC Model - Stress: \n', conf_matrix)
print('Classification report of KNN Model - Stress: \n', classif_report)

In [None]:
# XG Boost Classifier For Anxiety Classifier
xgb_model = XGBClassifier()
le = LabelEncoder()

xgb_model.fit(X_train, le.fit_transform(Y_train))

Y_pred = xgb_model.predict(X_test)
Y_pred = le.inverse_transform(Y_pred)

accuracy_xgb = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
classif_report = classification_report(Y_test, Y_pred)

print('Accuracy of XGB Model - Stress: {:.5f}'.format(accuracy_xgb))
print('Confusion Matrix of XGB Model - Stress: \n', conf_matrix)
print('Classification report of XGB Model - Stress: \n', classif_report)

In [None]:
# Combining Naive Bayes, XG Boost and Random Forest

# Training the models on whole data
final_xgb_model = XGBClassifier()
final_nb_model = GaussianNB()
final_rfc_model = RandomForestClassifier(n_estimators=110, random_state=110)


final_xgb_model.fit(X_train, le.fit_transform(Y_train))
final_nb_model.fit(X_train, Y_train)
final_rfc_model.fit(X_train, Y_train)

# Making prediction by take mode of predictions made by all the classifiers
xgb_preds = final_xgb_model.predict(X_test)
xgb_preds = le.inverse_transform(xgb_preds)

nb_preds = final_nb_model.predict(X_test)
rfc_preds = final_rfc_model.predict(X_test)

final_preds = [mode([i,j,k])[0][0] for i,j, k in zip(xgb_preds, nb_preds, rfc_preds)]

accuracy_combined = accuracy_score(Y_test, final_preds)
conf_matrix = confusion_matrix(Y_test, final_preds)
classif_report = classification_report(Y_test, final_preds)

print('Accuracy of Combined Model - Stress: {:.5f}'.format(accuracy_combined))
print('Confusion Matrix of Combined Model - Stress: \n', conf_matrix)
print('Classification report of Combined Model - Stress: \n', classif_report)

In [None]:
accuracy_stress = [accuracy_knn, accuracy_svc, accuracy_nb, accuracy_rfc, accuracy_xgb, accuracy_combined]

stress_analysis = pd.DataFrame({
                        'Models' : ['KNN', 'SVC', 'Naive Bayes', 'Random Forest', 'XG Boost', 'Combined Model'],
                        'Accuracy_Stress': [x * 100 for x in accuracy_stress]
                    })

## Analysis of the Models Used

In [None]:
analysis = pd.concat([stress_analysis.set_index('Models'), 
                      anxiety_analysis.set_index('Models'), 
                      depression_analysis.set_index('Models')],
                     axis=1)

In [None]:
analysis.head()

In [None]:
ax = analysis.plot(kind='barh')
plt.title("Accuracy of classification using different ML algorithms for DASS42")
plt.xlabel("Accuracy")
plt.ylabel("Models")
plt.show()