In [1]:
# Importing necessary libraries
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Load libraries 
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn.metrics import accuracy_score
import pickle 
import joblib

In [2]:
def reading_csv(filename):
    df=pd.read_csv(filename)
    return df

In [3]:
df_dep=reading_csv("../SurveyDataset/Depression.csv")
df_anx=reading_csv("../SurveyDataset/Anxiety.csv")
df_str=reading_csv("../SurveyDataset/Stress.csv")

In [4]:
df_dep.head()

Unnamed: 0,Q3(D),Q5(D),Q10(D),Q13(D),Q16(D),Q17(D),Q21(D)
0,0,3,0,1,0,0,1
1,2,2,3,3,0,3,3
2,2,1,2,2,2,2,2
3,0,2,1,2,1,2,2
4,0,1,0,0,1,0,3


In [5]:
def load_model(filename):
    with open(filename, 'rb') as file:
        model = pickle.load(file)
    return model

In [6]:
# Depression
model = load_model("../models/dep_model_svm.pkl")
y_pred_dep = model.predict(df_dep)

In [7]:
model = load_model("../models/anx_model_svm.pkl")
y_pred_anx = model.predict(df_anx)

In [8]:
model = load_model("../models/str_model_svm.pkl")
y_pred_str = model.predict(df_str)

In [9]:
y_pred_dep

array([1, 4, 3, 2, 1, 1, 4, 4, 4, 1, 2, 3, 3, 2, 1, 0, 0, 1, 0, 0, 2, 1,
       0, 2, 3, 4, 0, 0, 3, 4, 2, 2, 4, 2, 0, 0, 0, 1, 2, 3, 0, 3, 0, 1,
       1, 0, 2, 1, 0, 0, 4, 1, 2, 4, 0, 2, 0, 0, 0, 3, 0, 3, 0, 1, 1, 1,
       2, 4, 0, 1, 0, 2, 0, 0, 4, 2, 3, 0, 2, 1, 0, 0, 4, 1, 2, 4, 0, 1,
       2, 0, 0, 0, 4, 0, 3, 2, 1, 3, 2, 0, 0, 3, 4, 0, 2, 2, 4, 4, 0, 2,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 4, 2, 2, 1, 0, 0, 0, 0, 3, 2, 1, 2,
       3, 0, 4, 2, 0, 2, 2, 2, 1, 4, 2, 4, 4, 0, 2, 1, 0, 4, 0, 2, 0, 1,
       4, 2, 2, 3, 0, 0, 2, 0, 1, 2, 0, 1, 0, 2, 0, 4, 0, 2, 0, 4, 0, 4,
       1, 4, 0, 4, 2, 3, 4, 0, 2, 0, 2, 2, 4, 0, 0, 4, 0, 2, 3, 0],
      dtype=int64)

In [10]:
df=reading_csv("../SurveyDataset/CleanData.csv")

In [11]:
df['Depression'] = y_pred_dep
df['Anxiety'] = y_pred_anx
df['Stress'] = y_pred_str

In [12]:
df.head()

Unnamed: 0,Which age group do you belong to?,Sex,What is your current family type?,How many hours do you sleep per day?,Who do you sleep with usually in the bedroom?,How is your quality of sleep?,Does your health limit you in doing daily physical activities? [Light activities],Does your health limit you in doing daily physical activities? [Moderate activities],Does your health limit you in doing daily physical activities? [Heavy activities],How often do you consume alcohol in a week?,...,Distrust,Hopelessness,Unavailability,Practical barriers,Stigma,age class,Gender,Depression,Anxiety,Stress
0,18 - 25,Male,Joint,4-6,With parents,Very good,No problem,No problem,No problem,I don’t drink alcohol,...,0,1,0,0,0,0,1,1,0,1
1,18 - 25,Male,Nuclear,7-9,Alone,Good,Very much,Moderately,Very less,I don’t drink alcohol,...,1,1,0,1,0,0,1,4,4,4
2,18 - 25,Female,Nuclear,7-9,With Siblings,Good,Very much,Very much,Very much,I don’t drink alcohol,...,0,1,0,1,1,0,0,3,4,2
3,18 - 25,Female,Nuclear,<4,Alone,Average,No problem,No problem,No problem,I don’t drink alcohol,...,1,1,1,1,1,0,0,2,3,3
4,18 - 25,Male,Joint,7-9,With parents,Very good,Very less,Moderately,Very less,I don’t drink alcohol,...,1,1,1,1,1,0,1,1,2,1


In [13]:
scale_mapper = {0:'Normal' ,1:'Mild', 2:'Moderate',3:'Severe' ,4:'Extremely Severe'}
col=['Depression','Anxiety','Stress']
for i in col:
    df[i]= df[i].replace(scale_mapper)

In [14]:
df.head()

Unnamed: 0,Which age group do you belong to?,Sex,What is your current family type?,How many hours do you sleep per day?,Who do you sleep with usually in the bedroom?,How is your quality of sleep?,Does your health limit you in doing daily physical activities? [Light activities],Does your health limit you in doing daily physical activities? [Moderate activities],Does your health limit you in doing daily physical activities? [Heavy activities],How often do you consume alcohol in a week?,...,Distrust,Hopelessness,Unavailability,Practical barriers,Stigma,age class,Gender,Depression,Anxiety,Stress
0,18 - 25,Male,Joint,4-6,With parents,Very good,No problem,No problem,No problem,I don’t drink alcohol,...,0,1,0,0,0,0,1,Mild,Normal,Mild
1,18 - 25,Male,Nuclear,7-9,Alone,Good,Very much,Moderately,Very less,I don’t drink alcohol,...,1,1,0,1,0,0,1,Extremely Severe,Extremely Severe,Extremely Severe
2,18 - 25,Female,Nuclear,7-9,With Siblings,Good,Very much,Very much,Very much,I don’t drink alcohol,...,0,1,0,1,1,0,0,Severe,Extremely Severe,Moderate
3,18 - 25,Female,Nuclear,<4,Alone,Average,No problem,No problem,No problem,I don’t drink alcohol,...,1,1,1,1,1,0,0,Moderate,Severe,Severe
4,18 - 25,Male,Joint,7-9,With parents,Very good,Very less,Moderately,Very less,I don’t drink alcohol,...,1,1,1,1,1,0,1,Mild,Moderate,Mild


In [15]:
#  saving the preprocessed dataframe
df.to_csv('../SurveyDataset/CleanData.csv',index=False)