In [34]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.express as px
from plotly.subplots import make_subplots
from plotly import tools
import plotly.figure_factory as ff

In [35]:
# reading csv file
df_dep=pd.read_csv("../Datasets/Depression.csv")
df_anx=pd.read_csv("../Datasets/Anxiety.csv")
df_str=pd.read_csv("../Datasets/Stress.csv")
df_str.head()

Unnamed: 0,Q1(S),Q6(S),Q8(S),Q11(S),Q12(S),Q14(S),Q18(S),Stress
0,1,1,3,2,3,3,3,3
1,2,0,3,0,2,0,2,1
2,3,3,3,3,3,1,3,4
3,2,2,3,3,2,1,2,3
4,2,1,0,3,2,0,3,2


In [36]:
df=pd.read_csv("../Datasets/CleanData.csv")
df.head()

Unnamed: 0,Q1(S),Q2(A),Q3(D),Q4(A),Q5(D),Q6(S),Q7(A),Q8(S),Q9(A),Q10(D),...,Str,Anx,Dep,Depression,Anxiety,Stress,family,family_enc,age class,age_enc
0,1,1,2,3,2,1,3,3,3,2,...,32,38,32,4,4,3,Nuclear,2,18-25,0
1,2,0,1,0,1,0,0,3,0,1,...,18,2,12,1,0,1,Nuclear,2,18-25,0
2,3,1,3,0,3,3,2,3,3,3,...,38,28,42,4,4,4,Nuclear,2,18-25,0
3,2,2,1,1,1,2,0,3,2,2,...,30,20,20,2,4,3,Nuclear,2,18-25,0
4,2,0,0,0,1,1,2,0,1,2,...,22,8,16,2,1,2,Nuclear,2,18-25,0


### Selecting Dperession,Anxiety,& Stress Score and TIPI Questions

In [37]:
tp=[df.columns[21:31]]
list(tp[0])
cols=['Dep','Anx','Str']
for i in tp[0]:
    cols.append(i)

In [38]:
new_df=df[cols]
new_df.shape

(28465, 13)

In [39]:
corr=new_df.corr()

In [40]:
for i in corr.columns[:3]:
    corr.drop(i, axis = 1, inplace = True)

In [41]:
corr.index[3:]

Index(['TIPI1', 'TIPI2', 'TIPI3', 'TIPI4', 'TIPI5', 'TIPI6', 'TIPI7', 'TIPI8',
       'TIPI9', 'TIPI10'],
      dtype='object')

In [42]:
ind=list(corr.index[3:])

In [43]:
corr=corr.drop(ind)

In [44]:
corr

Unnamed: 0,TIPI1,TIPI2,TIPI3,TIPI4,TIPI5,TIPI6,TIPI7,TIPI8,TIPI9,TIPI10
Dep,-0.247007,0.15349,-0.2265,0.408752,-0.224981,0.217322,-0.051227,0.230102,-0.450629,0.121965
Anx,-0.110246,0.152875,-0.137125,0.456509,-0.176735,0.138909,0.019747,0.198922,-0.384779,0.104502
Str,-0.126304,0.243932,-0.139206,0.536431,-0.187788,0.131033,-0.00965,0.191271,-0.489235,0.084765


### Correlation Heatmap

In [45]:
z_text = np.round(corr.values, decimals=2)

In [46]:
fig = ff.create_annotated_heatmap(z=corr.values, x=list(corr.columns), y=['Dep', 'Anx', 'Str'], 
                                  colorscale=[[0,'#bae1d4'],[1,'#274472']], annotation_text=z_text)
fig.show()

In [47]:
fig = go.Figure(data=go.Heatmap(
                   z=corr.values,
                   x=list(corr.columns),
                   y=['Dep', 'Anx', 'Str'],
                  colorscale=[[0,'#bae1d4'],[1,'#274472']],
                   hoverongaps = False))
fig.show()

In [48]:
# TIPI1	Extraverted, enthusiastic.
# TIPI2	Critical, quarrelsome.
# TIPI3	Dependable, self-disciplined.
# TIPI4	Anxious, easily upset.
# TIPI5	Open to new experiences, complex.
# TIPI6	Reserved, quiet.
# TIPI7	Sympathetic, warm.
# TIPI8	Disorganized, careless.
# TIPI9	Calm, emotionally stable.
# TIPI10	Conventional, uncreative.

## Questions

In [49]:
def scale_chart(labels,x_data,y_data):

    colors = ['#274472', '#42669e','#6d93cf','#94bbf7']
    fig = go.Figure()

    for i in range(0, len(x_data[0])):
        for xd, yd in zip(x_data, y_data):
            fig.add_trace(go.Bar(
                x=[xd[i]], y=[yd],
                orientation='h',
                marker=dict(
                    color=colors[i],
                    line=dict(color='rgb(248, 248, 249)', width=1)
                )
            ))

    fig.update_layout(
        xaxis=dict(
            showgrid=False,
            showline=False,
            showticklabels=False,
            zeroline=False,
            domain=[0.15, 1]
        ),
        yaxis=dict(
            showgrid=False,
            showline=False,
            showticklabels=False,
            zeroline=False,
        ),
        barmode='stack',
        paper_bgcolor='rgb(248, 248, 255)',
        plot_bgcolor='rgb(248, 248, 255)',
        margin=dict(l=120, r=10, t=140, b=80),
        showlegend=False,
    )

    annotations = []

    for yd, xd in zip(y_data, x_data):
        # labeling the y-axis
        annotations.append(dict(xref='paper', yref='y',
                                x=0.14, y=yd,
                                xanchor='right',
                                text=str(yd),
                                font=dict(family='Arial', size=10,
                                          color='rgb(67, 67, 67)'),
                                showarrow=False, align='right'))
        # labeling the first percentage of each bar (x_axis)
        annotations.append(dict(xref='x', yref='y',
                                x=xd[0] / 2, y=yd,
                                text=str(xd[0]) + '%',
                                font=dict(family='Arial', size=12,
                                          color='rgb(248, 248, 255)'),
                                showarrow=False))
        # labeling the first Likert scale (on the top)
        if yd == y_data[-1]:
            annotations.append(dict(xref='x', yref='paper',
                                    x=xd[0] / 2, y=1.1,
                                    text=top_labels[0],
                                    font=dict(family='Arial', size=12,
                                              color='rgb(67, 67, 67)'),
                                    showarrow=False))
        space = xd[0]
        for i in range(1, len(xd)):
                # labeling the rest of percentages for each bar (x_axis)
                annotations.append(dict(xref='x', yref='y',
                                        x=space + (xd[i]/2), y=yd,
                                        text=str(xd[i]) + '%',
                                        font=dict(family='Arial', size=12,
                                                  color='rgb(248, 248, 255)'),
                                        showarrow=False))
                # labeling the Likert scale
                if yd == y_data[-1]:
                    annotations.append(dict(xref='x', yref='paper',
                                            x=space + (xd[i]/2), y=1.1,
                                            text=top_labels[i],
                                            font=dict(family='Arial', size=12,
                                                      color='rgb(67, 67, 67)'),
                                            showarrow=False))
                space += xd[i]

    fig.update_layout(annotations=annotations)
    print("0 = Did not apply to me at all\n1 = Applied to me to some degree, or some of the time\n2 = Applied to me to a considerable degree or a good part of time\n3 = Applied to me very much or most of the time")
    fig.write_image("fig1.png")
    fig.write_image("fig1.jpeg")
    fig.write_image("fig1.webp")
    fig.write_image("fig1.svg")
    fig.show()

In [50]:
top_labels = ['0','1','2','3']

In [51]:
# depression
y_data=['Were you not able to experience positive feelings?',
'Do you find it difficult to work up the initiative to do things?',
'Do you feel that you have nothing to look forward to?',
'Do you feel sad and depressed?',
'Do you find it hard to get enthusiastic about things?',
'Do you feel you weren\'t worth much as a person?',
'Do you feel that life has become meaningless?']
x_data=[]
for i in df_dep.columns[:-1]:
    x_data.append(list(df[i].value_counts()/df.shape[0]*100))
np_array = np.array(x_data)
np_round_to_tenths = np.around(np_array, 1)
x_data = list(np_round_to_tenths)
scale_chart(top_labels,x_data,y_data)

0 = Did not apply to me at all
1 = Applied to me to some degree, or some of the time
2 = Applied to me to a considerable degree or a good part of time
3 = Applied to me very much or most of the time


In [19]:
# anxiety
y_data=['Were you aware of the dryness in your mouth?',
'Do you experience breathing difficulty (eg, excessively rapid breathing, breathlessness in the absence of physical exertion).',
'Do you experience trembling (eg, in your hands).',
'Are you worried about situations in which you might panic and make a fool of yourself?',
'Do you find yourself on the verge of panicking?',
'How aware are you of the action of your heart in the absence of any physical exertion (eg, sense of heart rate increase, heart missing a beat).',
'Do you feel scared without any good reason?']
x_data=[]
for i in df_anx.columns[:-1]:
    x_data.append(list(df[i].value_counts()/df.shape[0]*100))
np_array = np.array(x_data)
np_round_to_tenths = np.around(np_array, 1)
x_data = list(np_round_to_tenths)
scale_chart(top_labels,x_data,y_data)

0 = Did not apply to me at all
1 = Applied to me to some degree, or some of the time
2 = Applied to me to a considerable degree or a good part of time
3 = Applied to me very much or most of the time


In [20]:
#stress
y_data=['Do you find it hard to calm down after something upset you?',
'Do you tend to over-react in situations?',
'Do you find yourself in a state of nervous tension?',
'Do you find yourself getting agitated?',
'Do you find it difficult to relax?',
'Do you find it difficult to tolerate interruptions while doing something?',
'Do you find yourself getting impatient when you are delayed in any way (eg, elevators, traffic lights, being kept waiting).']
x_data=[]
for i in df_str.columns[:-1]:
    x_data.append(list(df[i].value_counts()/df.shape[0]*100))
np_array = np.array(x_data)
np_round_to_tenths = np.around(np_array, 1)
x_data = list(np_round_to_tenths)
scale_chart(top_labels,x_data,y_data)

0 = Did not apply to me at all
1 = Applied to me to some degree, or some of the time
2 = Applied to me to a considerable degree or a good part of time
3 = Applied to me very much or most of the time


## Gender related plots

In [21]:
df1=df.iloc[:,31:]
df1.head()

Unnamed: 0,education,urban,gender,age,familysize,Str,Anx,Dep,Depression,Anxiety,Stress,family,family_enc,age class,age_enc
0,3,2,2,19,4,32,38,32,4,4,3,Nuclear,2,18-25,0
1,2,3,2,20,4,18,2,12,1,0,1,Nuclear,2,18-25,0
2,1,1,2,18,3,38,28,42,4,4,4,Nuclear,2,18-25,0
3,2,1,2,18,2,30,20,20,2,4,3,Nuclear,2,18-25,0
4,3,0,2,20,2,22,8,16,2,1,2,Nuclear,2,18-25,0


In [22]:
scale = {0:'Normal' ,1:'Mild', 2:'Moderate',3:'Severe' ,4:'Extremely Severe'}

In [23]:
tr=list(df.groupby(['gender', 'Depression']).size().reset_index(name='counts').sort_values(by=['Depression'])['counts'])
gender=df['gender'].unique().sort()
colors = ['#c6e1d7', '#98bebe', '#7999ad','#657295','#50486d']
fig = go.Figure(data=[
    go.Bar(name='0', x=gender, y=tr[:3],marker_color=colors[0]),
    go.Bar(name='1', x=gender, y=tr[3:6],marker_color=colors[1]),
    go.Bar(name='2', x=gender, y=tr[6:9],marker_color=colors[2]),
    go.Bar(name='3', x=gender, y=tr[9:12],marker_color=colors[3]),
    go.Bar(name='4', x=gender, y=tr[12:15],marker_color=colors[4])
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

In [24]:
labels = [0,1,2,3,4]
tr=df.groupby(['gender', 'Depression']).size().reset_index(name='counts')['counts']
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=list(tr[0:5]/sum(tr[0:5])*100), name="Male",marker_colors=colors),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=list(tr[5:10]/sum(tr[5:10])*100), name="Female",marker_colors=colors),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=list(tr[10:15]/sum(tr[10:15])*100), name="Other",marker_colors=colors),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Proportion of Depression score gender wise",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Male', x=0.12, y=0.5, font_size=20, showarrow=False),
                 dict(text='Female', x=0.50, y=0.5, font_size=20, showarrow=False),
                 dict(text='Other', x=0.90, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [25]:
tr=list(df.groupby(['gender', 'Anxiety']).size().reset_index(name='counts').sort_values(by=['Anxiety'])['counts'])
gender=df['gender'].unique().sort()
colors = ['#c6e1d7', '#98bebe', '#7999ad','#657295','#50486d']
fig = go.Figure(data=[
    go.Bar(name='0', x=gender, y=tr[:3],marker_color=colors[0]),
    go.Bar(name='1', x=gender, y=tr[3:6],marker_color=colors[1]),
    go.Bar(name='2', x=gender, y=tr[6:9],marker_color=colors[2]),
    go.Bar(name='3', x=gender, y=tr[9:12],marker_color=colors[3]),
    go.Bar(name='4', x=gender, y=tr[12:15],marker_color=colors[4])
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

In [26]:
labels = [0,1,2,3,4]
tr=df.groupby(['gender', 'Anxiety']).size().reset_index(name='counts')['counts']
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=list(tr[0:5]/sum(tr[0:5])*100), name="Male",marker_colors=colors),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=list(tr[5:10]/sum(tr[5:10])*100), name="Female",marker_colors=colors),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=list(tr[10:15]/sum(tr[10:15])*100), name="Other",marker_colors=colors),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Proportion of Anxiety score gender wise",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Male', x=0.12, y=0.5, font_size=20, showarrow=False),
                 dict(text='Female', x=0.50, y=0.5, font_size=20, showarrow=False),
                 dict(text='Other', x=0.90, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [27]:
tr=list(df.groupby(['gender', 'Stress']).size().reset_index(name='counts').sort_values(by=['Stress'])['counts'])
gender=df['gender'].unique().sort()
colors = ['#c6e1d7', '#98bebe', '#7999ad','#657295','#50486d']
fig = go.Figure(data=[
    go.Bar(name='0', x=gender, y=tr[:3],marker_color=colors[0]),
    go.Bar(name='1', x=gender, y=tr[3:6],marker_color=colors[1]),
    go.Bar(name='2', x=gender, y=tr[6:9],marker_color=colors[2]),
    go.Bar(name='3', x=gender, y=tr[9:12],marker_color=colors[3]),
    go.Bar(name='4', x=gender, y=tr[12:15],marker_color=colors[4])
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

In [28]:
labels = [0,1,2,3,4]
tr=df.groupby(['gender', 'Stress']).size().reset_index(name='counts')['counts']
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=list(tr[0:5]/sum(tr[0:5])*100), name="Male",marker_colors=colors),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=list(tr[5:10]/sum(tr[5:10])*100), name="Female",marker_colors=colors),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=list(tr[10:15]/sum(tr[10:15])*100), name="Other",marker_colors=colors),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Proportion of Stress score gender wise",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Male', x=0.12, y=0.5, font_size=20, showarrow=False),
                 dict(text='Female', x=0.50, y=0.5, font_size=20, showarrow=False),
                 dict(text='Other', x=0.90, y=0.5, font_size=20, showarrow=False)])
fig.show()

## Age related plots

In [29]:
fig = go.Figure(data=[go.Histogram(x=df['age'],marker_color='#50486d')])
fig.show()

In [30]:
age = ['18-25','26-35','36-45','46-60','60+']
dep=[]
anx=[]
stre=[]
cols={'Depression':dep,'Anxiety':anx,'Stress':stre}
for key, value in cols.items():
    for j in age:
        value.append(df[df['age class']==j][key].mean())
age = [*age, age[0]]

In [31]:
dep = [*dep, dep[0]]
anx = [*anx, anx[0]]
stre = [*stre, stre[0]]

In [32]:
fig = go.Figure(
    data=[
        go.Scatterpolar(r=dep, theta=age, fill='toself',line_color = '#d39e12',name='Depression'),
        go.Scatterpolar(r=anx, theta=age, fill='toself',line_color ='#00837e',name='Anxiety'),
        go.Scatterpolar(r=stre, theta=age, fill='toself',line_color ='#012048',name='Stress')
    ],
    layout=go.Layout(
        title=go.layout.Title(text='Mean score comparison Age-wise'),
        polar={'radialaxis': {'visible': True}},
        showlegend=True
    )
)
fig.show()

In [33]:
tr=list(df.groupby(['family', 'Depression']).size().reset_index(name='counts').sort_values(by=['Depression'])['counts'])
family=np.sort(df['family'].unique())
colors = ['#c6e1d7', '#98bebe', '#7999ad','#657295','#50486d']
fig = go.Figure(data=[
    go.Bar(name='0', x=family, y=tr[:3],marker_color=colors[0]),
    go.Bar(name='1', x=family, y=tr[3:6],marker_color=colors[1]),
    go.Bar(name='2', x=family, y=tr[6:9],marker_color=colors[2]),
    go.Bar(name='3', x=family, y=tr[9:12],marker_color=colors[3]),
    go.Bar(name='4', x=family, y=tr[12:15],marker_color=colors[4])
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()