## Data Preparation

In [174]:
import numpy as np
import pandas as pd
import plotly.express as px

In [175]:
df = pd.read_csv('survey.csv')

In [176]:
df.head()

Unnamed: 0,Timestamp,Age,Gender,Country,state,self_employed,family_history,treatment,work_interfere,no_employees,...,leave,mental_health_consequence,phys_health_consequence,coworkers,supervisor,mental_health_interview,phys_health_interview,mental_vs_physical,obs_consequence,comments
0,2014-08-27 11:29:31,37,Female,United States,IL,,No,Yes,Often,6-25,...,Somewhat easy,No,No,Some of them,Yes,No,Maybe,Yes,No,
1,2014-08-27 11:29:37,44,M,United States,IN,,No,No,Rarely,More than 1000,...,Don't know,Maybe,No,No,No,No,No,Don't know,No,
2,2014-08-27 11:29:44,32,Male,Canada,,,No,No,Rarely,6-25,...,Somewhat difficult,No,No,Yes,Yes,Yes,Yes,No,No,
3,2014-08-27 11:29:46,31,Male,United Kingdom,,,Yes,Yes,Often,26-100,...,Somewhat difficult,Yes,Yes,Some of them,No,Maybe,Maybe,No,Yes,
4,2014-08-27 11:30:22,31,Male,United States,TX,,No,No,Never,100-500,...,Don't know,No,No,Some of them,Yes,Yes,Yes,Don't know,No,


In [177]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 27 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Timestamp                  1259 non-null   object
 1   Age                        1259 non-null   int64 
 2   Gender                     1259 non-null   object
 3   Country                    1259 non-null   object
 4   state                      744 non-null    object
 5   self_employed              1241 non-null   object
 6   family_history             1259 non-null   object
 7   treatment                  1259 non-null   object
 8   work_interfere             995 non-null    object
 9   no_employees               1259 non-null   object
 10  remote_work                1259 non-null   object
 11  tech_company               1259 non-null   object
 12  benefits                   1259 non-null   object
 13  care_options               1259 non-null   object
 14  wellness

In [178]:
# Convert all columns to lowercase
df.columns = df.columns.str.lower()

In [179]:
df['country'].value_counts()

United States             751
United Kingdom            185
Canada                     72
Germany                    45
Ireland                    27
Netherlands                27
Australia                  21
France                     13
India                      10
New Zealand                 8
Poland                      7
Switzerland                 7
Sweden                      7
Italy                       7
South Africa                6
Belgium                     6
Brazil                      6
Israel                      5
Singapore                   4
Bulgaria                    4
Austria                     3
Finland                     3
Mexico                      3
Russia                      3
Denmark                     2
Greece                      2
Colombia                    2
Croatia                     2
Portugal                    2
Moldova                     1
Georgia                     1
Bahamas, The                1
China                       1
Thailand  

In [180]:
df['country'].value_counts().mean()

26.229166666666668

In [181]:
df['state'].unique()

array(['IL', 'IN', nan, 'TX', 'TN', 'MI', 'OH', 'CA', 'CT', 'MD', 'NY',
       'NC', 'MA', 'IA', 'PA', 'WA', 'WI', 'UT', 'NM', 'OR', 'FL', 'MN',
       'MO', 'AZ', 'CO', 'GA', 'DC', 'NE', 'WV', 'OK', 'KS', 'VA', 'NH',
       'KY', 'AL', 'NV', 'NJ', 'SC', 'VT', 'SD', 'ID', 'MS', 'RI', 'WY',
       'LA', 'ME'], dtype=object)

In [182]:
df = df.drop(['timestamp','comments'], axis = 1)

In [183]:
df.describe(include='all')

Unnamed: 0,age,gender,country,state,self_employed,family_history,treatment,work_interfere,no_employees,remote_work,...,anonymity,leave,mental_health_consequence,phys_health_consequence,coworkers,supervisor,mental_health_interview,phys_health_interview,mental_vs_physical,obs_consequence
count,1259.0,1259,1259,744,1241,1259,1259,995,1259,1259,...,1259,1259,1259,1259,1259,1259,1259,1259,1259,1259
unique,,49,48,45,2,2,2,4,6,2,...,3,5,3,3,3,3,3,3,3,2
top,,Male,United States,CA,No,No,Yes,Sometimes,6-25,No,...,Don't know,Don't know,No,No,Some of them,Yes,No,Maybe,Don't know,No
freq,,615,751,138,1095,767,637,465,290,883,...,819,563,490,925,774,516,1008,557,576,1075
mean,79428150.0,,,,,,,,,,...,,,,,,,,,,
std,2818299000.0,,,,,,,,,,...,,,,,,,,,,
min,-1726.0,,,,,,,,,,...,,,,,,,,,,
25%,27.0,,,,,,,,,,...,,,,,,,,,,
50%,31.0,,,,,,,,,,...,,,,,,,,,,
75%,36.0,,,,,,,,,,...,,,,,,,,,,


In [184]:
# Function to remove all null values and plot bar graph
def df_modifier(x_coord, y_coord):
  df_non_NaN = df[df[x_coord].notnull()][[x_coord, y_coord]]
  return df_non_NaN.groupby(x_coord).filter(lambda x: len(x) > 26)
  pass

def bar_graph_plotter(x_coord, y_coord, y_label, graph_title):
  filtered_df = df_modifier(x_coord, y_coord)
  plot_df = (filtered_df.groupby(x_coord)[y_coord].value_counts(normalize=True).mul(100).round(2).sort_index()
    .to_frame(name=y_label)).reset_index()

  fig = px.bar(plot_df, x=x_coord, y=y_label, title=graph_title, color=y_coord)
  fig.show()
  pass

# Employee Specific

In [185]:
# Determining Employees seeking treatment of their mental illness in US
bar_graph_plotter('state', 'treatment', 'count', 'Employees seeking treatment of their mental illness in US')

In [186]:
# Determining Employees seeking treatment of their mental illness in World
bar_graph_plotter('country', 'treatment', 'count', 'Employees seeking treatment of their mental illness in World')

In [187]:
# Determining Employees knowing options for mental care their employer provides in US
bar_graph_plotter('state', 'care_options', 'count', 'Employees knowing options for mental care their employer provides in US')

In [188]:
# Determining Employees knowing options for mental care their employer provides in World
bar_graph_plotter('country', 'care_options', 'count', 'Employees knowing options for mental care their employer provides in World')

In [189]:
# Determining Employees comfortable discussing their mental health with their coworkers in US
bar_graph_plotter('state', 'coworkers', 'count', 'Employees comfortable discussing their mental health with their coworkers in US')

In [190]:
bar_graph_plotter('country', 'coworkers', 'count', 'Employees comfortable discussing their mental health with their coworkers in World')

In [191]:
bar_graph_plotter('state', 'supervisor', 'count', 'Employees comfortable discussing their mental health with their direct supervisor in US')

In [192]:
bar_graph_plotter('country', 'coworkers', 'count', 'Employees comfortable discussing their mental health with their direct supervisor in World')

In [193]:
bar_graph_plotter('state', 'mental_vs_physical', 'count', 'Employers who put mental health as important as physical health as per employee')

In [194]:
bar_graph_plotter('country', 'mental_vs_physical', 'count', 'Employers who put mental health as important as physical health as per employee')

# Employer Specific

In [195]:
bar_graph_plotter('state', 'benefits', 'count', 'Employers who provides mental health benefits in US')

In [196]:
bar_graph_plotter('country', 'benefits', 'count', 'Employers who provides mental health benefits in World')

In [197]:
bar_graph_plotter('state', 'wellness_program', 'count', 'Employers who discuss mental health of their employee as per wellness program in US')

In [198]:
bar_graph_plotter('country', 'wellness_program', 'count', 'Employers who discuss mental health of their employee as per wellness program in World')

In [199]:
bar_graph_plotter('state', 'seek_help', 'count', 'Employers who provides resources to seek help about mental health issues in US')

In [200]:
bar_graph_plotter('country', 'seek_help', 'count', 'Employers who provides resources to seek help about mental health issues in World')

In [203]:
bar_graph_plotter('state', 'leave', 'count', 'Easness at which the medical leave can be takenby the employee due to mental state in US')

In [204]:
bar_graph_plotter('country', 'leave', 'count', 'Easness at which the medical leave can be takenby the employee due to mental state in world')