## EDA: Mental Health of Students

---
---

In [1]:
# import required libraries

import pandas as pd
import numpy as np
from janitor import clean_names
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file
from bokeh.models import ColumnDataSource
from bokeh.models import HoverTool, CategoricalColorMapper
from bokeh.models.widgets import DataTable, TableColumn 
from bokeh.models.widgets import HTMLTemplateFormatter
from bokeh.layouts import row, column
from bokeh.models.widgets import Tabs, Panel
from matplotlib import pyplot as plt
from math import pi
import seaborn as sns
import bokeh
import warnings

# Settings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None, "display.max_rows", None)
output_notebook()

In [2]:
df = pd.read_csv("Student Mental health.csv")
df.head()

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,Your current year of Study,What is your CGPA?,Marital status,Do you have Depression?,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 11 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Timestamp                                     101 non-null    object 
 1   Choose your gender                            101 non-null    object 
 2   Age                                           100 non-null    float64
 3   What is your course?                          101 non-null    object 
 4   Your current year of Study                    101 non-null    object 
 5   What is your CGPA?                            101 non-null    object 
 6   Marital status                                101 non-null    object 
 7   Do you have Depression?                       101 non-null    object 
 8   Do you have Anxiety?                          101 non-null    object 
 9   Do you have Panic attack?                     101 non-null    obj

### Data Cleaning

---

Essential Cleaning steps:

1. Change column names
2. Remove Extra spaces from rows
3. Remove null values
4. Change data types 
5. Check letter case



##### Change Column names

In [4]:
# new column names
cols = "DATE, Gender, Age, Course, Study Year, CGPA, Marital Status, Depression, Anxiety, Panic Attack, Treatment".split(", ") 

# rename columns
df.columns = cols

# clean column names
df = clean_names(df)

# output
df.head()

Unnamed: 0,date,gender,age,course,study_year,cgpa,marital_status,depression,anxiety,panic_attack,treatment
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


##### Missing Values (& Their Treatment)

In [5]:
df.isna().sum()

date              0
gender            0
age               1
course            0
study_year        0
cgpa              0
marital_status    0
depression        0
anxiety           0
panic_attack      0
treatment         0
dtype: int64

In [6]:
# Where is the missing value?
df[df.age.isna()== True]

Unnamed: 0,date,gender,age,course,study_year,cgpa,marital_status,depression,anxiety,panic_attack,treatment
43,8/7/2020 15:07,Male,,BIT,year 1,0 - 1.99,No,No,No,No,No


- There's one missing value in 'Age' column. 

- The students is in 1st year. So we can change it to anything between 18-21.

- He has no depression, nor any anxiety, but has poor CGPA.

- Hence, I'm considering him 20 years old.

In [7]:
# Replace null value with 20
df.age = df.age.fillna(20)

# cross check
df.isnull().sum()

date              0
gender            0
age               0
course            0
study_year        0
cgpa              0
marital_status    0
depression        0
anxiety           0
panic_attack      0
treatment         0
dtype: int64

We're clear to go ahead.

##### Change Data Types

- Date and Age column require a change in their data types.

In [8]:
df.date = pd.to_datetime(df.date) # date time
df.age = df.age.astype('int')   # float to integer

In [9]:
df.head() # output

Unnamed: 0,date,gender,age,course,study_year,cgpa,marital_status,depression,anxiety,panic_attack,treatment
0,2020-08-07 12:02:00,Female,18,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,2020-08-07 12:04:00,Male,21,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,2020-08-07 12:05:00,Male,19,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,2020-08-07 12:06:00,Female,22,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,2020-08-07 12:13:00,Male,23,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


##### Remove White Spaces and Format the Data to One Case

- In the 'studyyear' column there are capital as well as small letters. We'll convert them to one class.

- If this case arises in other categorical columns we will deal with them.

In [10]:
df.gender.value_counts()

Female    75
Male      26
Name: gender, dtype: int64

- Gender column looks cleaned.

In [11]:
df.study_year.value_counts()

year 1    41
Year 3    19
Year 2    16
year 2    10
year 4     8
year 3     5
Year 1     2
Name: study_year, dtype: int64

Some issues present

In [12]:
df.marital_status.value_counts()

No     85
Yes    16
Name: marital_status, dtype: int64

- No issue with Marital Status column.

In [13]:
df.depression.value_counts()

No     66
Yes    35
Name: depression, dtype: int64

- Depression column looks good

In [14]:
df.anxiety.value_counts()

No     67
Yes    34
Name: anxiety, dtype: int64

- Anxiety column has no issue either.

In [15]:
df.panic_attack.value_counts()

No     68
Yes    33
Name: panic_attack, dtype: int64

In [16]:
df.treatment.value_counts()

No     95
Yes     6
Name: treatment, dtype: int64

- Panic Attack and Treatment column also looks good.

- We have to clean only the 'Study Year' column.

In [18]:
# convert everything to lower case

df.study_year = df.study_year.apply(lambda x: x.lower())

In [19]:
# Final Data for Data Analysis

df.sample(10)

Unnamed: 0,date,gender,age,course,study_year,cgpa,marital_status,depression,anxiety,panic_attack,treatment
65,2020-08-07 17:50:00,Female,23,Econs,year 1,3.50 - 4.00,No,Yes,Yes,No,No
48,2020-08-07 15:14:00,Male,24,BCS,year 2,3.00 - 3.49,No,Yes,No,No,No
64,2020-08-07 17:46:00,Female,24,Engine,year 1,3.50 - 4.00,No,No,No,No,No
45,2020-08-07 15:09:00,Male,18,BCS,year 1,3.50 - 4.00,No,No,Yes,Yes,No
10,2020-08-07 12:39:00,Female,20,Psychology,year 1,3.50 - 4.00,No,No,No,No,No
50,2020-08-07 15:27:00,Female,23,ALA,year 1,2.50 - 2.99,Yes,Yes,No,Yes,Yes
21,2020-08-07 13:13:00,Female,18,KOE,year 2,3.00 - 3.49,No,No,No,No,No
29,2020-08-07 14:05:00,Female,24,BCS,year 4,3.50 - 4.00,No,No,No,No,No
85,2020-07-13 10:33:47,Female,18,psychology,year 1,3.50 - 4.00,No,Yes,Yes,No,Yes
67,2020-08-07 18:11:00,Male,19,MHSC,year 3,3.00 - 3.49,Yes,Yes,No,Yes,No


- Everything looks good so far.

- We don't need the 'Date' Column as it is nothing but the date of survey.

In [20]:
#output 

df.to_csv('cleaned_student_mh.csv', index= None)

In [21]:
df.date.dt.year.unique() # year of survey

array([2020])

In [22]:
df.date.dt.month.unique() # month of survey

array([8, 9, 7])

In [23]:
df.date.dt.day.unique() # days of survey

array([ 7, 13, 18])

In [24]:
# unique dates
df.date.dt.date.unique()

array([datetime.date(2020, 8, 7), datetime.date(2020, 9, 7),
       datetime.date(2020, 7, 13), datetime.date(2020, 7, 18)],
      dtype=object)

In [25]:
df.date.dt.day_name().unique() # day name

array(['Friday', 'Monday', 'Saturday'], dtype=object)

- The survey was conducted in 2020, in between July to September.

- Students gave their opinion on 13th, and 18th of July, 7th August and in 9th of September 2020.

In [26]:
# Drop Date

df = df.drop('date', axis=1)

df.head()

Unnamed: 0,gender,age,course,study_year,cgpa,marital_status,depression,anxiety,panic_attack,treatment
0,Female,18,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,Male,21,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,Male,19,BIT,year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,Female,22,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,Male,23,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


### Exploratory Data Analysis:

---

In [27]:
# Gender wise data
gender_count = pd.DataFrame(df.gender.value_counts()).reset_index().rename(columns={'gender':'count', 'index':'gender'})
gender_count['angle'] = gender_count['count'] / gender_count['count'].sum() * 2*pi
gender_count['color'] = ['blue', 'orange']

# Study Year
current_year = pd.DataFrame(df.study_year.value_counts()).reset_index().rename(columns={"study_year":'count', 'index':'study year'})

# CGPA
cgpa_count = pd.DataFrame(df.cgpa.value_counts()).reset_index().rename(columns={'cgpa':'count', 'index':'CGPA'})
cgpa_count = cgpa_count.sort_values(by='CGPA', ascending=True)

# Marital Status
marital_status = pd.DataFrame(df.marital_status.value_counts()).reset_index().rename(columns={'marital_status':'count', 'index':'marital_status'})
marital_status['angle'] = marital_status['count'] / marital_status['count'].sum() * 2* pi
marital_status['color'] = ['#d61c4e', '#3fa796']
# Gender wise marital status
marital_status_gender = pd.crosstab(df['gender'], df['marital_status']).reset_index()

In [28]:
# Plot1: Gender wise number of students
p1 = figure(
    height=450, 
    title='Gender wise Student Participation',
    tools='hover', 
    tooltips='@gender: @count',
    x_range=(-0.5, 1.0)
)

p1.wedge(
    x=0, 
    y=1, 
    radius=0.4,
    start_angle=bokeh.transform.cumsum(
        'angle', 
        include_zero=True
    ),
    end_angle=bokeh.transform.cumsum('angle'),
    line_color='white',
    fill_color='color',
    legend_field='gender',
    source=gender_count
)

p1.axis.axis_label = None
p1.axis.visible = False
p1.grid.grid_line_color = None
p1.title.text_color='black'
p1.title.text_font_style='bold'
p1.title.text_font_size='20px'
p1.title.text_font = 'Ariel'



# Plot2: Grade wise student Count
data = ColumnDataSource(cgpa_count)

hover_tool = HoverTool(
    tooltips = [ 
        ('CGPA', '@CGPA'),
        ('No. of Students', '@count')
    ]
)

category_map = CategoricalColorMapper(
    factors = cgpa_count['CGPA'].tolist(),
    palette= bokeh.palettes.Blues256
)

p2 = figure(
    x_range = cgpa_count['CGPA'].tolist(),
    height = 450,
    title = "Grade wise Number of Students",
    tools = [hover_tool],
    x_axis_label='Grade in CGPA',
    y_axis_label= 'No. of Students'
)

p2.vbar(
    x='CGPA',
    top = 'count',
    width = 0.90,
    source = data,
    # legend_field = 'CGPA',
    line_color = 'white',
    color = {
        'field':'CGPA', 
        'transform': category_map
    }
)

p2.xgrid.grid_line_color = None
p2.y_range.start = 0
p2.y_range.end = cgpa_count['count'].max()
p2.title.text_color='black'
p2.title.text_font_style='bold'
p2.title.text_font_size='20px'
p2.title.text_font = 'Ariel'



# plot3: Study Year wise number of students
colors =  ["#4c0033", "#790252", '#af0171', '#E80F88']
data = ColumnDataSource(current_year)

hover_tool = HoverTool(
    tooltips = [
        ('No. of students', '@count')
    ]
)

category_map = CategoricalColorMapper(
    factors = current_year['study year'].tolist(),
    palette= colors
)

p3 = figure(
    x_range=current_year['study year'].tolist(),
    height = 450,
    title= 'Number of Students and their Study Years',
    tools = [hover_tool],
    x_axis_label = 'Year',
    y_axis_label='No. of students'
)

p3.vbar(
    x='study year', 
    top='count', 
    width=0.90, 
    source=data, 
    legend_field='study year', 
    line_color='white', 
    color = {'field':'study year', 'transform':category_map}
) #color can also be added through: fill_color=bokeh.transform.factor_cmap('study year', palette=colors, factors=current_year['study year'].tolist()))

p3.xgrid.grid_line_color = None
p3.y_range.start = 0
p3.y_range.end = current_year['count'].max()
p3.title.text_color='black'
p3.title.text_font_style='bold'
p3.title.text_font_size='20px'
p3.title.text_font = 'Ariel'


# Number of students based on marital status
p4 = figure(
    height=450, 
    title='Marital Status of the Students',
    tools='hover', 
    tooltips='@marital_status: @count',
    x_range=(-0.5, 1.0)
)

p4.wedge(
    x=0, 
    y=1, 
    radius=0.4,
    start_angle=bokeh.transform.cumsum('angle', include_zero=True,),
    end_angle=bokeh.transform.cumsum('angle'),
    line_color='white',
    fill_color='color',
    legend_field='marital_status',
    source=marital_status
)

p4.axis.axis_label = None
p4.axis.visible = False
p4.grid.grid_line_color = None
p4.title.text_color='black'
p4.title.text_font_style='bold'
p4.title.text_font_size='20px'
p4.title.text_font = 'Ariel'


# Age distribution of students 
hist, edges = np.histogram(df.age, density=True)
p5 = figure(
    height=450, 
    title="Age Distribution of Students",
    x_axis_label = 'Age',
    y_axis_label='Frequency',
)
p5.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], line_color='white')

p5.title.text_color='black'
p5.title.text_font_style='bold'
p5.title.text_font_size='20px'
p5.title.text_font = 'Ariel'


# Gender wise Marital Status
data = ColumnDataSource(marital_status_gender)

p6 = figure(
    x_range = marital_status_gender.gender.tolist(),
    y_range = (0,70),
    height = 450,
    title='Gender wise Marital Status of Students',
    tooltips = [ 
        ('Gender', '@gender'),
        ('Unmarried', '@No'),
        ('Married', '@Yes')
    ],
    x_axis_label = 'Gender',
    y_axis_label='No. of Students'
)

p6.vbar(
    x=bokeh.transform.dodge('gender', -0.17, range=p6.x_range),
    top = 'No',
    width = 0.3,
    source = data,
    color = '#005f99',
    legend_label='Not married'
)

p6.vbar(
    x=bokeh.transform.dodge('gender', 0.17, range=p6.x_range),
    top = 'Yes',
    width = 0.3,
    source = data,
    color = '#ff449f',
    legend_label = "Married"
)

p6.x_range.range_padding = 0.1
p6.xgrid.grid_line_color = None
p6.title.text_color = 'black'
p6.title.text_font_style = 'bold'
p6.title.text_font_size='20px'
p6.title.text_font = 'Ariel'

In [29]:
# Result plots

tab2 = Panel(child=p1, title='Gender')
tab3 = Panel(child=p2, title='Grade')
tab4 = Panel(child=p3, title='Study Year')
tab5 = Panel(child=p4, title='Marital Status 1')
tab6 = Panel(child=p6, title='marital Status 2')
tab1 = Panel(child=p5, title="Age Distribution")

tabs_obj = Tabs(tabs=[tab1, tab2, tab3, tab4, tab5, tab6])

show(tabs_obj)

In [30]:
def GroupedBar(df, y_range, title, tooltips, x_axis_label, y_axis_label, x_axis, top1, top2, legend1, legend2, col1, col2):
    data = ColumnDataSource(df)

    plot = figure(
        x_range = df.iloc[:,0].tolist(),
        y_range = y_range,
        height = 450,
        title = title,
        tooltips = tooltips,
        x_axis_label = x_axis_label,
        y_axis_label = y_axis_label
    )

    plot.vbar(
        x = bokeh.transform.dodge(x_axis, -0.17, range=plot.x_range),
        top = top1,
        width = 0.3,
        source = data,
        color = col1,
        legend_label = legend1
    )

    plot.vbar(
        x = bokeh.transform.dodge(x_axis, 0.17, range = plot.x_range),
        top = top2,
        width = 0.3,
        source = data,
        color = col2,
        legend_label = legend2
    )

    plot.x_range.range_padding = 0.1
    plot.xgrid.grid_line_color = None
    plot.title.text_color = 'black'
    plot.title.text_font_style = 'bold'
    plot.title.text_font = 'Ariel'
    plot.title.text_font_size = '20px'

    return plot

### Depression

In [31]:
# Number of depressed students
total_depressed = pd.DataFrame(df.depression.value_counts()).reset_index().rename(columns={'depression':'count', 'index': 'depression'})
total_depressed['angle'] = total_depressed['count']/ total_depressed['count'].sum()*2*pi
total_depressed['color'] = ['#367e18', '#cc3636']

p7 = figure(
    height=450, 
    title='How many Students are Depressed?',
    tools='hover', 
    tooltips='@depression: @count',
    x_range=(-0.5, .9)
)

p7.wedge(
    x=0, 
    y=1, 
    radius=0.4,
    start_angle=bokeh.transform.cumsum(
        'angle', 
        include_zero=True
    ),
    end_angle=bokeh.transform.cumsum('angle'),
    line_color='white',
    fill_color='color',
    legend_field='depression',
    source=total_depressed
)

p7.axis.axis_label = None
p7.axis.visible = False
p7.grid.grid_line_color = None
p7.title.text_color='black'
p7.title.text_font_style='bold'
p7.title.text_font_size='20px'
p7.title.text_font = 'Ariel'

# Which gender students are more depressed?
depression_gender = pd.crosstab(df.gender, df.depression).reset_index()
tooltips = [('Gender', '@gender'), ('No depression', '@No'), ('Depressed', '@Yes')]
p8 = GroupedBar(df=depression_gender, y_range = (0,50), title="Which Students are Depressed?",\
     tooltips = tooltips, x_axis_label='Gender', y_axis_label='No. of Students' ,x_axis='gender', top1='No', \
        top2='Yes', legend1='Not depressed', legend2='Depressed', col1='#367e18', col2='#cc3636')


# Relation of marriage with depression
depression_marriage = pd.crosstab(df.marital_status, df.depression).reset_index()
title = 'How marriage is related to depression?'
tool = [
    ('Marital status', '@marital_status'),
    ('No depression', '@No'),
    ('Depression', '@Yes')
]

p9 = GroupedBar(df=depression_marriage, y_range=(0, 70), title=title, tooltips=tool, x_axis_label='Marital Status',\
    y_axis_label = 'No. of Students', x_axis='marital_status', top1='No', top2='Yes', \
        legend1='Not depressed', legend2='Depressed', col1='#367e18', col2='#cc3636')


# Relation of grades with depression
depression_grade = pd.crosstab(df.cgpa, df.depression).reset_index()

title = 'How grade is related to depression among students'
tool = [ 
    ('Grade','@cgpa'),
    ('No depression', '@No'),
    ('Depression', '@Yes')
]
p10 = GroupedBar(df=depression_grade, y_range=(0,40), title=title, tooltips=tool,\
    x_axis_label='Grade in CGPA', y_axis_label='No. of Students', x_axis='cgpa', top1='No', top2='Yes',\
        legend1='Not depressed', legend2='Depressed', col1='#367e18', col2='#cc3636')

p10.legend.location = 'top_left'


# Are the depressed Students seeking help?
depressed_students = df[df.depression=='Yes']
depression_treatment = pd.crosstab(depressed_students.gender, depressed_students.treatment).reset_index()

title = 'Are the depressed students seeking Help?'
tips = [ 
    ('Gender', '@gender'),
    ('No treatment','@No'),
    ('Treatment', '@Yes')
]

p11 = GroupedBar(df=depression_treatment, y_range=(0,27), title=title, tooltips=tips,\
    x_axis_label='Gender', y_axis_label='No. of Students', x_axis='gender', top1='No', top2='Yes',\
        legend1='No treatment', legend2='Treatment', col1='#d2001a', col2='#006e7f')


In [32]:
# Depression Result plots
tab7 = Panel(child=p7, title='Depression')
tab8 = Panel(child=p8, title='Gender based')
tab9 = Panel(child=p9, title='Marriage based')
tab10 = Panel(child=p10, title='Grade based')
tab11 = Panel(child = p11, title='Treatment')

tabs_obj1 = Tabs(tabs=[tab7, tab8, tab9, tab10, tab11])

show(tabs_obj1)

### Anxiety

In [79]:
anxiety_total = df.anxiety.value_counts().reset_index().rename(columns={'anxiety':'count', 'index':'anxiety'})
anxiety_total['angle'] = anxiety_total['count']/anxiety_total['count'].sum() *2*pi
anxiety_total['color'] = ['#fb7813', '#17706e'][::-1]
p12 = figure(
    height=450, 
    title='How many Students are Suffering from Anxiety?',
    tools='hover', 
    tooltips='@anxiety: @count',
    x_range=(-0.5, .9)
)

p12.wedge(
    x=0, 
    y=1, 
    radius=0.4,
    start_angle=bokeh.transform.cumsum(
        'angle', 
        include_zero=True
    ),
    end_angle=bokeh.transform.cumsum('angle'),
    line_color='white',
    fill_color='color',
    legend_field='anxiety',
    source=anxiety_total
)

p12.axis.axis_label = None
p12.axis.visible = False
p12.grid.grid_line_color = None
p12.title.text_color='black'
p12.title.text_font_style='bold'
p12.title.text_font_size='20px'
p12.title.text_font = 'Ariel'

# Gender wise
anxiety_gender = pd.crosstab(df.gender, df.anxiety).reset_index()
title = 'Relation of Gender with Anxiety'
tips = [ 
    ('Gender', '@gender'),
    ('No anxiety', '@No'),
    ('Anxiety', '@Yes')
]
p13 = GroupedBar(df=anxiety_gender, y_range=(0,65), title=title, tooltips=tips,\
    x_axis_label='Gender', y_axis_label='No. of Students', x_axis='gender', top1='No', top2='Yes',\
        legend1='No anxiety', legend2='Anxiety', col1='#52006a', col2='#cd113b')

# Marital status
anxiety_marriage = pd.crosstab(df.marital_status, df.anxiety).reset_index()
title = 'Marriage Vs Anxiety'
tips = [ 
    ('Marital Status', '@marital_status'),
    ('No anxiety', '@No'),
    ('Anxiety', '@Yes')
]
p14 = GroupedBar(df=anxiety_marriage, y_range=(0,60), title=title, tooltips=tips, \
        x_axis_label='Marital Status', y_axis_label='No. of Students', x_axis='marital_status',\
            top1='No', top2='Yes', legend1='No anxiety', legend2='Anxiety', col1='#52006a', col2='#cd113b')

anxiety_grade = pd.crosstab(df.cgpa, df.anxiety).reset_index()
title='Grade Vs. Anxiety'
tips = [ 
    ('Grade', '@cgpa'),
    ('No anxiety', '@No'),
    ('Anxiety', '@Yes')
]
p15 = GroupedBar(df=anxiety_grade, y_range=(0,30), title=title, tooltips=tips, x_axis_label='Grade',\
    y_axis_label='No. of Students', x_axis='cgpa', top1='No', top2='Yes', legend1='No anxiety', \
        legend2='Anxiety', col1='#52006a', col2='#cd113b')
p15.legend.location='top_left'

# Are Anxiety suffering students seeking help of treatment
anxiety_students = df[df.anxiety=='Yes']
treatment1 = pd.crosstab(anxiety_students.gender, anxiety_students.treatment).reset_index()
treatment2 = pd.crosstab(anxiety_students.marital_status, anxiety_students.treatment).reset_index()
title = 'Are the students taking medical treatment? (Gender)'
tips = [ 
    ('Gender','@gender'),
    ('No treatment', '@No'),
    ('Treatment', '@Yes')
]
p16 = GroupedBar(df=treatment1, y_range=(0,25), title=title, tooltips=tips,x_axis_label='Gender',\
    y_axis_label='No. of Students', x_axis='gender', top1='No', top2 ='Yes', legend1='No treatment',\
        legend2='Treatment', col1='#c40018', col2='#2b580c')

title = 'Are the students taking medical treatment? (Marriage)'
tips = [ 
    ('marital_status', '@marital_status'),
    ('No treatment', '@No'),
    ('Treatment', '@Yes')
]
p17 = GroupedBar(df=treatment2, y_range=(0,30), title=title, tooltips=tips, x_axis_label='Marital Status',\
    y_axis_label='No. of Students', x_axis='marital_status', top1='No', top2='Yes', legend1='No treatment',\
        legend2='Treatment', col1='#c40018', col2='#2b580c')


# Does depression causes anxiety?
anxiety_depression =pd.crosstab(df.depression, df.anxiety).reset_index()
title = "Relationship of Depression with Anxiety"
tips = [ 
    ('Depression', '@depression'),
    ('No Anxiety', '@No'),
    ('Anxiety', '@Yes')
]
p18 = GroupedBar(df=anxiety_depression, y_range=(0,52), title=title, tooltips=tips, x_axis_label='Depression',\
    y_axis_label='No. of Students', x_axis='depression', top1='No', top2='Yes', legend1='No anxiety',\
        legend2='Anxiety', col1='#003355',col2='#ff5200')

In [80]:
tab12 = Panel(child=p12, title='Anxiety')
tab13 = Panel(child=p13, title='Gender')
tab14 = Panel(child=p14, title='Marriage')
tab15 = Panel(child=p15, title='Grade')
tab16 = Panel(child=p18, title='Depression')
tab17 = Panel(child=p16, title='Treatment1')
tab18 = Panel(child=p17, title='Treatment2')

tabs_obj2 = Tabs(tabs=[tab12, tab13, tab14, tab15, tab16, tab17, tab18])

show(tabs_obj2)

### Panic Attacks

In [111]:
# How many students getting panic attack?
panic_attack_total = df.panic_attack.value_counts().reset_index().rename(columns={'panic_attack':'count', 'index':'panic_attack'})
panic_attack_total['angle'] = panic_attack_total['count'] / panic_attack_total['count'].sum() *2*pi
panic_attack_total['color'] = ['#0e2f56','#810b97']

p19 = figure(
    height=450,
    title='No. of students suffering panic attack',
    tools = 'hover',
    tooltips='@panic_attack: @count',
    x_range = (-0.5,.9)
)

p19.wedge(
    x=0,
    y=1,
    radius=0.4,
    start_angle=bokeh.transform.cumsum('angle', include_zero=True),
    end_angle = bokeh.transform.cumsum('angle'),
    line_color = 'white',
    fill_color = 'color',
    legend_field='panic_attack',
    source = panic_attack_total
)

p19.axis.axis_label = None
p19.axis.visible = False
p19.grid.grid_line_color = None
p19.title.text_color='black'
p19.title.text_font_style='bold'
p19.title.text_font_size='20px'
p19.title.text_font = 'Ariel'


# Gender
panic_attack_gender = pd.crosstab(df.gender,df.panic_attack).reset_index()
title = 'Gender Vs Panic Attack'
tips = [ 
    ('Gender','@gender'),
    ('No panic attack', '@No'),
    ('Panic attack', '@Yes')
]
p20 = GroupedBar(df=panic_attack_gender, y_range=(0,52), title=title, tooltips=tips,\
    x_axis_label='Gender', y_axis_label='No. of students', x_axis='gender', top1='No', top2='Yes',\
        legend1='No panic attack', legend2='Panic attack', col1='#005f99', col2='#cf0000')


# marital status
panic_attack_marriage = pd.crosstab(df.marital_status,df.panic_attack).reset_index()
title='Marital Status Vs Panic Attack'
tips = [ 
    ('Marital Status', '@marital_status'),
    ('No panic attack', '@No'),
    ('Panic attack', '@Yes')
]
p21 = GroupedBar(df=panic_attack_marriage, y_range=(0,65), title=title, tooltips=tips,\
    x_axis_label='Marital Status', y_axis_label='No. of students', x_axis='marital_status',\
    top1='No', top2='Yes',legend1='No panic attack', legend2='Panic attack', col1='#005f99', col2='#cf0000')



# Grade
panic_attack_grade = pd.crosstab(df.cgpa, df.panic_attack).reset_index()
title='Grade Vs Panic Attack'
tips = [ 
    ('Grade','@cgpa'),
    ('No panic attack','@No'),
    ('Panic attack','@Yes')
]
p22 = GroupedBar(df=panic_attack_grade, y_range=(0,36), title=title, tooltips=tips,\
    x_axis_label='Grade in CGPA', y_axis_label='No. of students', x_axis='cgpa',\
    top1='No', top2='Yes',legend1='No panic attack', legend2='Panic attack', col1='#005f99', col2='#cf0000')
p22.legend.location='top_left'


# Depression
panic_attack_depression = pd.crosstab(df.depression, df.panic_attack).reset_index()
title='Does depression causes panic attack?'
tips = [ 
    ('Depression','@depression'),
    ('No panic attack','@No'),
    ('Panic attack','@Yes')
]
p23 = GroupedBar(df=panic_attack_depression, y_range=(0,52), title=title, tooltips=tips,\
    x_axis_label='Depression', y_axis_label='No. of students', x_axis='depression',\
    top1='No', top2='Yes',legend1='No panic attack', legend2='Panic attack', col1='#005f99', col2='#cf0000')



# Does anxiety causes panic attack?
panic_attack_anxiety = pd.crosstab(df.anxiety, df.panic_attack).reset_index()
title='Does anxiety causes panic attack?'
tips = [ 
    ('Anxiety','@anxiety'),
    ('No panic attack','@No'),
    ('Panic attack','@Yes')
]
p24 = GroupedBar(df=panic_attack_anxiety, y_range=(0,50), title=title, tooltips=tips,\
    x_axis_label='Anxiety', y_axis_label='No. of students', x_axis='anxiety',\
    top1='No', top2='Yes',legend1='No panic attack', legend2='Panic attack', col1='#005f99', col2='#cf0000')


# How many students are taking treatment
panic_attack_students = df[df.panic_attack=='Yes']
panic_attack_treatment = panic_attack_students.treatment.value_counts().reset_index().rename(columns={'treatment':'count','index':'treatment'})
panic_attack_treatment['angle'] = panic_attack_total['count'] / panic_attack_total['count'].sum() *2*pi
panic_attack_treatment['color'] = ['#ac0d0d', '#146356']
p25 = figure(
    height=450,
    title='How many students are taking medical help?',
    tools = 'hover',
    tooltips='@treatment: @count',
    x_range = (-0.5,.9)
)

p25.wedge(
    x=0,
    y=1,
    radius=0.4,
    start_angle=bokeh.transform.cumsum('angle', include_zero=True),
    end_angle = bokeh.transform.cumsum('angle'),
    line_color = 'white',
    fill_color = 'color',
    legend_field='treatment',
    source = panic_attack_treatment
)
p25.axis.axis_label = None
p25.axis.visible = False
p25.grid.grid_line_color = None
p25.title.text_color='black'
p25.title.text_font_style='bold'
p25.title.text_font_size='20px'
p25.title.text_font = 'Ariel'

In [113]:
tab19 = Panel(child=p19, title='Panic Attack')
tab20 = Panel(child=p20, title='Gender wise')
tab21 = Panel(child=p21, title='Marital Status')
tab22 = Panel(child=p22, title='Grade wise')
tab23 = Panel(child=p23, title='Depression')
tab24 = Panel(child=p24, title='Anxiety')
tab25 = Panel(child=p25, title='Treatment')

tab_obj3 = Tabs(tabs=[tab19, tab20, tab21, tab22, tab23, tab24, tab25])

show(tab_obj3)

In [49]:
df.groupby(['depression', 'anxiety', 'panic_attack']).panic_attack.count()

depression  anxiety  panic_attack
No          No       No              37
                     Yes             13
            Yes      No              13
                     Yes              3
Yes         No       No              10
                     Yes              7
            Yes      No               8
                     Yes             10
Name: panic_attack, dtype: int64

In [None]:
# How many students are getting treatment
mixed = df.groupby(['marital_status', 'depression', 'anxiety']).panic_attack.count().reset_index()
data = ColumnDataSource(data=mixed)

def get_html_formatter(my_col):
    template = """
        <div style="background:<%=
            (function colorfromint(){
                if(result_col =='Yes'){
                    return('#ff1e00')
                }
                else if (result_col=='No'){
                    return('#1363df')
                }
            }()) %>;
            color: black">
        <%= value %>
        </div>
    """.replace('result_col', my_col)

    return HTMLTemplateFormatter(template=template)

columns = [ 
    TableColumn(field='marital_status', title="Marital Status", formatter=get_html_formatter('marital_status')),
    TableColumn(field='depression', title='Depression', formatter=get_html_formatter('depression')),
    TableColumn(field='anxiety', title='Anxiety', formatter=get_html_formatter('anxiety')),
    TableColumn(field='panic_attack', title='Panic Attack')
]

data_table = DataTable(source=data, columns=columns, width=800)

output_file(filename='data_table.html')
show(data_table)

![](datatable.png)