More information on the mental health in tech survey [here](https://www.kaggle.com/osmi/mental-health-in-tech-survey/data).

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
# Uploading file.

df = pd.read_csv('datafiles/mental_health_tech_survey.csv')

In [3]:
# Let's see what we're dealing with.

df.head()

Unnamed: 0,Timestamp,Age,Gender,Country,state,self_employed,family_history,treatment,work_interfere,no_employees,...,leave,mental_health_consequence,phys_health_consequence,coworkers,supervisor,mental_health_interview,phys_health_interview,mental_vs_physical,obs_consequence,comments
0,2014-08-27 11:29:31,37,Female,United States,IL,,No,Yes,Often,6-25,...,Somewhat easy,No,No,Some of them,Yes,No,Maybe,Yes,No,
1,2014-08-27 11:29:37,44,M,United States,IN,,No,No,Rarely,More than 1000,...,Don't know,Maybe,No,No,No,No,No,Don't know,No,
2,2014-08-27 11:29:44,32,Male,Canada,,,No,No,Rarely,6-25,...,Somewhat difficult,No,No,Yes,Yes,Yes,Yes,No,No,
3,2014-08-27 11:29:46,31,Male,United Kingdom,,,Yes,Yes,Often,26-100,...,Somewhat difficult,Yes,Yes,Some of them,No,Maybe,Maybe,No,Yes,
4,2014-08-27 11:30:22,31,Male,United States,TX,,No,No,Never,100-500,...,Don't know,No,No,Some of them,Yes,Yes,Yes,Don't know,No,


In [4]:
# How is Gender being categorized?

df['Gender'].unique()

array(['Female', 'M', 'Male', 'male', 'female', 'm', 'Male-ish', 'maile',
       'Trans-female', 'Cis Female', 'F', 'something kinda male?',
       'Cis Male', 'Woman', 'f', 'Mal', 'Male (CIS)', 'queer/she/they',
       'non-binary', 'Femake', 'woman', 'Make', 'Nah', 'All', 'Enby',
       'fluid', 'Genderqueer', 'Female ', 'Androgyne', 'Agender',
       'cis-female/femme', 'Guy (-ish) ^_^', 'male leaning androgynous',
       'Male ', 'Man', 'Trans woman', 'msle', 'Neuter', 'Female (trans)',
       'queer', 'Female (cis)', 'Mail', 'cis male', 'A little about you',
       'Malr', 'p', 'femail', 'Cis Man',
       'ostensibly male, unsure what that really means'], dtype=object)

In [5]:
# Looks like people wrote in responses.  Data cleaning time!

df['Gender'] = df['Gender'].str.lower()
df['Gender'] = df['Gender'].replace('m','male')
df['Gender'] = df['Gender'].replace('f','female')

df['Gender'] = df['Gender'].str.strip()

df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('cis ',''))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('(cis)',''))

df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('make','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('mail','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('mal','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('malee','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('malr','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('woman','female'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('maler','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('msle','male'))
df['Gender'] = df['Gender'].apply(lambda x: str(x).replace('man','male'))

df['Gender'].value_counts()

male                                              988
female                                            245
female (trans)                                      2
trans female                                        1
malee                                               1
male leaning androgynous                            1
all                                                 1
cis-female/femme                                    1
androgyne                                           1
a little about you                                  1
queer                                               1
trans-female                                        1
p                                                   1
queer/she/they                                      1
non-binary                                          1
fluid                                               1
enby                                                1
genderqueer                                         1
male-ish                    

In [6]:
# Changing those who didn't reply "male" or "female" to "other".

df['Gender'] = df['Gender'].apply(lambda x: 'other' if x != 'male' and x != 'female' else x)

df['Gender'].value_counts()

male      988
female    245
other      26
Name: Gender, dtype: int64

In [7]:
# Changing treatment responses to binary.

df['treatment'] = df.treatment.map({'Yes':1, 'No':0})

In [8]:
# Proportion of people who say they went for mental health treatment by gender.

df.groupby('Gender')['treatment'].mean()

Gender
female    0.689796
male      0.454453
other     0.730769
Name: treatment, dtype: float64

In [9]:
# Proportion of people who say they went for mental health treatment by 
#     whether or not the workplace provides mental health benefits.

df.groupby('benefits')['treatment'].mean()

benefits
Don't know    0.370098
No            0.483957
Yes           0.639413
Name: treatment, dtype: float64

In [10]:
df.groupby('coworkers')['treatment'].mean()

coworkers
No              0.457692
Some of them    0.503876
Yes             0.568889
Name: treatment, dtype: float64

In [11]:
df.groupby('supervisor')['treatment'].mean()

supervisor
No              0.526718
Some of them    0.514286
Yes             0.484496
Name: treatment, dtype: float64

In [12]:
df.groupby('obs_consequence')['treatment'].mean()

obs_consequence
No     0.473488
Yes    0.695652
Name: treatment, dtype: float64

In [13]:
df.groupby('care_options')['treatment'].mean()

care_options
No          0.413174
Not sure    0.391720
Yes         0.691441
Name: treatment, dtype: float64

In [14]:
df.groupby('wellness_program')['treatment'].mean()

wellness_program
Don't know    0.430851
No            0.498812
Yes           0.593886
Name: treatment, dtype: float64

In [15]:
df.groupby('seek_help')['treatment'].mean()

seek_help
Don't know    0.4573
No            0.5000
Yes           0.5920
Name: treatment, dtype: float64

In [19]:
df.groupby('no_employees')['treatment'].mean()

no_employees
1-5               0.561728
100-500           0.539773
26-100            0.519031
500-1000          0.450000
6-25              0.441379
More than 1000    0.517730
Name: treatment, dtype: float64

In [16]:
df.mental_health_interview.value_counts()

No       1008
Maybe     207
Yes        44
Name: mental_health_interview, dtype: int64

In [18]:
df.phys_health_interview.value_counts()

Maybe    557
No       500
Yes      202
Name: phys_health_interview, dtype: int64

#### Key

Timestamp

Age

Gender

Country

state: If you live in the United States, which state or territory do you live in?

self_employed: Are you self-employed?

family_history: Do you have a family history of mental illness?

treatment: Have you sought treatment for a mental health condition?

work_interfere: If you have a mental health condition, do you feel that it interferes with your work?

no_employees: How many employees does your company or organization have?

remote_work: Do you work remotely (outside of an office) at least 50% of the time?

tech_company: Is your employer primarily a tech company/organization?

benefits: Does your employer provide mental health benefits?

care_options: Do you know the options for mental health care your employer provides?

wellness_program: Has your employer ever discussed mental health as part of an employee wellness program?

seek_help: Does your employer provide resources to learn more about mental health issues and how to seek help?

anonymity: Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources?

leave: How easy is it for you to take medical leave for a mental health condition?

mental_health_consequence: Do you think that discussing a mental health issue with your employer would have negative consequences?

phys_health_consequence: Do you think that discussing a physical health issue with your employer would have negative consequences?

coworkers: Would you be willing to discuss a mental health issue with your coworkers?

supervisor: Would you be willing to discuss a mental health issue with your direct supervisor(s)?

mental_health_interview: Would you bring up a mental health issue with a potential employer in an interview?

phys_health_interview: Would you bring up a physical health issue with a potential employer in an interview?

mental_vs_physical: Do you feel that your employer takes mental health as seriously as physical health?

obs_consequence: Have you heard of or observed negative consequences for coworkers with mental health conditions in your workplace?

comments: Any additional notes or comments