In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

SEED = 42
pd.set_option("display.max_columns", None)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/data.csv', delimiter='\t')

In [None]:
removedFeatures = [f'Q{i}E' for i in range(1, 43)] # add feature 'Q1E' to 'Q42E' to be removed
removedFeatures.extend([f'Q{i}I' for i in range(1, 43)]) # add feature 'Q1E' to 'Q42E' to be removed
removedFeatures.extend([f'VCL{i}' for i in range(1, 17)]) # add feature 'VCL1' to 'VCL16' to be removed
removedFeatures.extend([ 'source', 'introelapse', 'testelapse', 'surveyelapse', 'engnat', 'hand', 'orientation',
    'voted', 'country', 'screensize', 'uniquenetworklocation','religion','major','urban'])
depression = df.drop(removedFeatures, axis=1)
depression.head()


In [None]:
removed_Question_featured=['Q2A','Q3A','Q5A','Q6A','Q7A','Q9A','Q10A','Q12A','Q13A','Q14A','Q15A','Q17A','Q18A','Q19A','Q20A','Q21A','Q23A','Q24A','Q25A','Q26A','Q27A','Q28A','Q30A','Q31A','Q32A','Q33A','Q35A','Q36A','Q37A','Q39A','Q41A']
depression=depression.drop(removed_Question_featured,axis=1)

In [None]:
depression.head()

In [None]:
depression.columns=['Q1','Q4','Q8','Q11','Q16','Q22','Q29','Q34','Q38','Q40','Q42','TIPI1','TIPI2',	'TIPI3'	,'TIPI4',	'TIPI5'	,'TIPI6'	,'TIPI7',	'TIPI8',	'TIPI9',	'TIPI10'	,'Education',	'Gender',	'Age'	,'Race',	'Married',	'Familysize']
depression.head()

In [None]:
depression.shape

In [None]:
# plt.figure(figsize=(10, 6))
# sns.heatmap(depression.isnull())

In [None]:
depression.isnull().sum()


In [None]:
depression['Education'] = depression['Education'].map({ 0: 1,  1: 1, 2: 2, 3: 3, 4: 4 })

def changeEducationTitle(title) -> str:
    if title == 0 or title == 1:
        return 'Less than high school'
    if title == 2:
        return 'High school'
    if title == 3:
        return 'University degree'
    if title == 4:
        return 'Graduate degree'
    return title


education_string = depression['Education'].apply(changeEducationTitle)


plt.figure(figsize=(10, 4))
sns.countplot(x=depression['Education'], hue=education_string)


In [None]:
depression['Gender'] = depression['Gender'].map({0: 2, 1: 1, 2: 2, 3: 3})

def changeGenderValue(value):
    if value == 1:
        return 'Male'
    if value == 2 or value == 0: # value = 0 means user didn't enter this value, we assume it's female as most records are
        return 'Female'
    return 'Other' # if 3 or 0 return other as value

gender = depression['Gender'].apply(changeGenderValue)

plt.figure(figsize=(8, 4))
sns.countplot(x = depression['Gender'], hue=gender)

In [None]:
depression['Race'] = depression['Race'].apply(lambda x: x/10)
depression['Race'].head()

In [None]:
def changeRaceValues(value)->str:
    if value == 1:
        return 'Asian'
    if value == 2:
        return 'Arab'
    if value == 3:
        return 'Black'
    if value == 4:
        return 'Indigenous Australian'
    if value == 5:
        return 'Native American'
    if value == 6:
        return 'White'
    if value == 7:
        return 'Other'

    return value

race = depression['Race'].apply(changeRaceValues)

display(depression['Race'].value_counts())

plt.figure(figsize=(8,4))
sns.countplot(x=depression['Race'], hue=race)

In [None]:
indexes = depression[depression['Familysize'] > 13].index

print(f'Depression size before: {depression.shape[0]}')
depression = depression.drop(indexes, axis=0)
print(f'Depression size after: {depression.shape[0]}')

plt.figure(figsize=(18, 5))
sns.countplot(x=depression['Familysize'])


In [None]:
age_indexes = depression[depression['Age'] > 65]['Age'].index

display(age_indexes) # figure out how many


print(f'Depression size before: {depression.shape[0]}')
depression.drop(age_indexes, axis=0, inplace=True)
print(f'Depression size after: {depression.shape[0]}')

In [None]:
def makeAgeGroup(value):
    if value <= 10:
        return 'Under 10'
    if  10 <= value <= 16:
        return 'Primary Children'
    if 17 <= value <= 21:
        return 'Secondary Children'
    if 21 <= value <= 35:
        return 'Adults'
    if 36 <= value <= 48:
        return 'Elder Adults'
    if value >= 49:
        return 'Older People'

age = depression['Age'].apply(makeAgeGroup)

plt.figure(figsize=(16, 4))
sns.countplot(x=depression['Age'], hue=age)


In [None]:
def makeAgeGroupFeature(value):
    if value <= 10:
        return 1
    if  10 <= value <= 16:
        return 2
    if 17 <= value <= 21:
        return 3
    if 22 <= value <= 35:
        return 4
    if 36 <= value <= 48:
        return 5
    if value >= 49:
        return 6

depression['Age_group'] = depression['Age'].apply(makeAgeGroupFeature)

depression.drop('Age', axis=1, inplace=True)
depression.head()



In [None]:
depression['Total_count']  = depression.sum(axis=1)
depression.head()

In [None]:
# depression[depression['Total_count']>89]['Total_count'].describe()

In [None]:
def buildTarget(value):
    if value <= 75:
        return 'Normal'
    if 75 < value <= 80:
        return 'Mild'
    if 80 < value <= 88:
        return 'Moderate'
    if 88 < value <= 96:
        return 'Severe'
    if value > 96:
        return 'Extremely Severe'


depression['Target'] = depression['Total_count'].apply(buildTarget)

depression.head()


In [None]:
plt.figure(figsize=(16, 4))
sns.countplot(x=depression['Target'])

In [None]:
target = depression['Target']
processed_data=depression['Total_count']
depression.drop(['Target', 'Total_count'], axis=1, inplace=True)

In [None]:
depression.head()

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(depression, target, test_size=.2,random_state=77)

print(f'x_train: {x_train.shape}, y_train: {y_train.shape}')
print(f'x_test: {x_test.shape}, y_test: {y_test.shape}')

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = MinMaxScaler()

x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
from sklearn.svm import SVC

svc_clf = SVC()

model=svc_clf.fit(x_train_scaled, y_train)

In [None]:
# from sklearn.model_selection import cross_val_score

# cross_score = cross_val_score(svc_clf, x_train_scaled, y_train, cv=5)
# print(f'Mean Score {np.mean(cross_score)}')

In [None]:
from sklearn.metrics import classification_report

y_pred_svc = svc_clf.predict(x_test_scaled)
  print(classification_report(y_test, y_pred_svc))

In [None]:
svc_clf.score(x_test_scaled, y_test)

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [None]:
def pretty_confusion_matrix(y_test, y_pred, labels = ['Extreme', 'Severe','Moderate','Mild','Normal']):
    cm = confusion_matrix(y_test, y_pred)
    pred_labels = ['Predicted ' + i for i in labels]
    df = pd.DataFrame(cm, columns = pred_labels, index = labels)
    return df

results_plot = pretty_confusion_matrix(y_test, y_pred_svc,['Extreme', 'Severe','Moderate','Mild','Normal'])
results_plot

In [None]:
depression.head()

In [None]:
processed_data.head()

In [None]:
import pickle

pickle.dump(model, open("model.pkl", "wb"))

In [None]:
%pip install gradio
import gradio as gr

In [None]:
def make_prediction(A,Q1	,Q4	,Q8	,Q11	,Q16	,Q22	,Q29,	Q34	,Q38	,Q40	,Q42,B,TIPI1	,TIPI2	,TIPI3	,TIPI4	,TIPI5	,TIPI6	,TIPI7,	TIPI8	,TIPI9	,TIPI10	,C,education,D,gender	,E,race	,F,married,familysize	,H,age_group):
    model_working=pickle.load(open('model.pkl','rb'))
    answer=np.array([Q1	,Q4	,Q8	,Q11	,Q16	,Q22	,Q29,	Q34	,Q38	,Q40	,Q42,TIPI1	,TIPI2	,TIPI3	,TIPI4	,TIPI5	,TIPI6	,TIPI7,	TIPI8	,TIPI9	,TIPI10	,education	,gender	,race	,married	,familysize	,age_group])
    answers_scaled=scaler.transform([answer])
    predict=model_working.predict(answers_scaled)
    return predict

In [None]:
app=gr.Interface(make_prediction,
                 inputs=[
                     gr.Markdown('''Question 1-11 have to be answer as per the no. range outlined below:\n
                                        1 = Did not apply to me at all\n
                                        2 = Applied to me to some degree, or some of the time\n
                                        3 = Applied to me to a considerable degree, or a good part of the time\n
                                        4 = Applied to me very much, or most of the time\n
                                        '''),
                     gr.Slider(minimum=1,maximum=4,step=1,label="1 : I found myself getting upset by quite trivial things."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="2 : I experienced breathing difficulty."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="3 : I found it difficult to relax."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="4 : I found myself getting upset rather easily."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="5 : I felt that I had lost interest in just about everything."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="6 : I found it hard to wind down."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="7 : I found it hard to calm down after something upset me."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="8 : I felt I was pretty worthless."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="9 : I felt that life was meaningless."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="10 : I was worried about situations in which I might panic and make a fool of myself."),
                     gr.Slider(minimum=1,maximum=4,step=1,label="11 : I found it difficult to work up the initiative to do things."),
                     gr.Markdown('''Question 1-10 have to be answer as per the no. range outlined below: \n
                                        1 = Disagree strongly\n
                                        2 = Disagree moderately\n
                                        3 = Disagree a little\n
                                        4 = Neither agree nor disagree\n
                                        5 = Agree a little\n
                                        6 = Agree moderately\n
                                        7 = Agree strongly\n'''),
                     gr.Slider(minimum=1,maximum=7,step=1,label="1 : Extraverted, enthusiastic."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="2 : Critical, quarrelsome."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="3 : Dependable, self-disciplined"),
                     gr.Slider(minimum=1,maximum=7,step=1,label="4 : Anxious, easily upset."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="5 : Open to new experiences, complex."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="6 : Reserved, quiet."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="7 : Sympathetic, warm."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="8 : Disorganized, careless."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="9 : Calm, emotionally stable."),
                     gr.Slider(minimum=1,maximum=7,step=1,label="10 : Conventional, uncreative."),
                     gr.Markdown('''Education:\n
                                        1=Less than high school,\n
                                        2=High school,\n
                                        3=University degree,\n
                                        4=Graduate degree\n'''),
                     gr.Slider(minimum=1,maximum=4,step=1,label="Education"),
                     gr.Markdown('''Gender:\n
                                        1=Male,\n
                                        2=Female,\n
                                        3=Other\n'''),
                     gr.Slider(minimum=1,maximum=3,step=1,label="Gender"),
                     gr.Markdown('''Race:\n
                                        1=Asian,\n
                                        2=Arab,\n
                                        3=Black,\n
                                        4=Indigenous Australian,\n
                                        5=Native American,\n
                                        6=White,\n
                                        7=Other\n'''),
                     gr.Slider(minimum=1,maximum=7,step=1,label="Race"),
                     gr.Markdown('''Married:\n
                                        1=Never married,\n
                                        2=Currently married,\n
                                        3=Previously married\n'''),
                     gr.Slider(minimum=1,maximum=3,step=1,label="Married"),
                     gr.Slider(minimum=1,maximum=13,step=1,label="FamilySize"),
                     gr.Markdown('''Age_group:\n
                                        1= less then 10,\n
                                        2 = 11 to 16,\n
                                        3 = 17 to 21,\n
                                        4 = 22 to 35,\n
                                        5 = 36 to 48,\n
                                        6 = above 48,\n'''),
                     gr.Slider(minimum=1,maximum=6,step=1,label="Age_group"),],
                     outputs=[gr.Textbox(label="Level of Depression")],)

app.launch(share=True)