In [39]:
# libraries

import numpy as np
import pandas as pd

pd.options.mode.chained_assignment = None

# Data prep

In [2]:
# read data

data_full = pd.read_csv("data_full.csv")

In [38]:
data_full.shape

(4398, 106)

In [4]:
# choose subset of variables for analysis

vars = ['year',
        'What country do you live in?',
        'Is your employer primarily a tech company/organization?',
        'Is your primary role within your company related to tech/IT?',
        'Do you currently have a mental health disorder?',
        'Have you ever been diagnosed with a mental health disorder?',
        'What is your age?',
        'What is your gender?',
        'What is your race?',
        'Does your employer provide mental health benefits as part of healthcare coverage?',
        'Do you know the options for mental health care available under your employer-provided health coverage?',
        'Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?',
        'Does your employer offer resources to learn more about mental health disorders and options for seeking help?',
        'If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?',
        'Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?',
        'Have you ever discussed your mental health with your employer?',
        'Would you feel comfortable discussing a mental health issue with your coworkers?',
        'Have you ever discussed your mental health with coworkers?',
        'Overall, how much importance does your employer place on mental health?',
        'Do you have medical coverage (private insurance or state-provided) that includes treatment of mental health disorders?',
        'Do you know local or online resources to seek help for a mental health issue?',
        'If you have been diagnosed or treated for a mental health disorder, do you ever reveal this to coworkers or employees?',
        'If you have revealed a mental health disorder to a coworker or employee, how has this impacted you or the relationship?',
        'Do you believe your productivity is ever affected by a mental health issue?',
        'If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?',
        'If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?', 
        'If you have a mental health disorder, how often do you feel that it interferes with your work when _NOT_ being treated effectively (i.e., when you are experiencing symptoms)?',
        'Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?',
        'Have you observed or experienced a supportive or well handled response to a mental health issue in your current or previous workplace?',
        'Overall, how well do you think the tech industry supports employees with mental health issues?']

temp = data_full.loc[:, vars]

In [5]:
# rename

temp.columns = ['year',
        'country_live',
        'employer_tech',
        'role_tech',
        'current_disorder',
        'ever_diagnosed',
        'age',
        'gender',
        'race',
        'employer_benefits',
        'know_options',
        'employer_discussed',
        'employer_resources',
        'leave_difficulty',
        'comfortable_discuss_supervisor',
        'have_discussed_employer',
        'comfortable_discuss_coworkers',
        'have_discussed_coworkers',
        'importance_employer',
        'coverage_treatment',
        'know_resources',
        'reveal_diagnosis_coworkers',
        'reveal_impacted_coworker',
        'productivity_affected',
        'percentage_time_affected',
        'often_interfere_treated',
        'often_interfere_not_treated',
        'response_unsupportive',
        'response_supportive',
        'industry_support']

In [23]:
temp.shape

(4398, 30)

In [40]:
# filter out non-tech employees (neither tech employer nor tech role)

data2 = temp[(temp.employer_tech == 1) | (temp.role_tech == 1)]

In [41]:
# standardize year

data2['year_z'] = (data2['year'] - data2['year'].mean()) / data2['year'].std()

In [42]:
# recode current disorder, Yes=1, No=0, all others NA

data2['current_disorder_n'] = np.nan
data2['current_disorder_n'][data2.current_disorder == "Yes"] = 1
data2['current_disorder_n'][data2.current_disorder == "No"] = 0

In [43]:
# recode gender; female=1, male=0, non-binary as NA

data2['gender_n'] = np.nan
data2['gender_n'][data2.gender == "Female"] = 1
data2['gender_n'][data2.gender == "Male"] = 0

In [44]:
# recode country; US=1, other=0

data2['location_US'] = np.nan
data2.location_US[data2.country_live.isin(["United States of America", "United States"])] = 1
data2.location_US[~data2.country_live.isin(["United States of America", "United States"])] = 0

In [123]:
# recode employer_benefits, Yes=1, No=0

data2['employer_benefits_n'] = np.nan
data2['employer_benefits_n'][data2.employer_benefits == "Yes"] = 1
data2['employer_benefits_n'][data2.employer_benefits == "No"] = 0

In [124]:
data2['employer_discussed_n'] = np.nan
data2['employer_discussed_n'][data2.employer_discussed == "Yes"] = 1
data2['employer_discussed_n'][data2.employer_discussed == "No"] = 0

In [125]:
data2['employer_resources_n'] = np.nan
data2['employer_resources_n'][data2.employer_resources == "Yes"] = 1
data2['employer_resources_n'][data2.employer_resources == "No"] = 0

# Logistic regression: Disorder ~ Time * Gender

In [68]:
# Only complete cases

c1 = data2[['current_disorder_n', 'year_z', 'gender_n']]
c1.shape

(3592, 3)

In [69]:
c1.dropna(axis = 0, how = 'any', inplace=True)
c1.shape

(1828, 3)

In [None]:
import statsmodels.formula.api as smf

In [92]:
# current disorder ~ time, gender

m1 = smf.mnlogit(formula = 'current_disorder_n ~ year_z*gender_n', data = c1).fit()

Optimization terminated successfully.
         Current function value: 0.674063
         Iterations 4


In [93]:
m1.summary()

0,1,2,3
Dep. Variable:,current_disorder_n,No. Observations:,1828.0
Model:,MNLogit,Df Residuals:,1824.0
Method:,MLE,Df Model:,3.0
Date:,"Thu, 04 Nov 2021",Pseudo R-squ.:,0.02291
Time:,16:55:51,Log-Likelihood:,-1232.2
converged:,True,LL-Null:,-1261.1
Covariance Type:,nonrobust,LLR p-value:,1.755e-12

current_disorder_n=1,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1277,0.066,-1.928,0.054,-0.258,0.002
year_z,0.1602,0.081,1.97,0.049,0.001,0.32
gender_n,0.8504,0.133,6.376,0.0,0.589,1.112
year_z:gender_n,-0.1739,0.156,-1.113,0.266,-0.48,0.132


In [106]:
pd.DataFrame(np.exp(m1.params))

Unnamed: 0,0
Intercept,0.880108
year_z,1.173779
gender_n,2.340662
year_z:gender_n,0.84038


In [107]:
pd.DataFrame(np.exp(m1.conf_int()))

Unnamed: 0_level_0,Unnamed: 1_level_0,lower,upper
current_disorder_n,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Intercept,0.772964,1.002102
1,year_z,1.000786,1.376675
1,gender_n,1.802206,3.039995
1,year_z:gender_n,0.61873,1.141431


### Interpretation

- Small effect of time on chances of having a current disorder
- Much larger effect of gender (female = higher chances)
- No evidence of a time * gender interaction

# Logistic regression: Disorder ~ age + gender + country

In [109]:
# complete cases only

c2 = data2[['current_disorder_n', 'age', 'gender_n', 'location_US']]
c2.shape

(3592, 4)

In [110]:
c2.dropna(axis = 0, how = 'any', inplace=True)
c2.shape

(1827, 4)

In [112]:
# current disorder ~ age, gender, country

m2 = smf.mnlogit(formula = 'current_disorder_n ~ age + gender_n + location_US', data = c2).fit()

Optimization terminated successfully.
         Current function value: 0.650828
         Iterations 5


In [113]:
m2.summary()

0,1,2,3
Dep. Variable:,current_disorder_n,No. Observations:,1827.0
Model:,MNLogit,Df Residuals:,1823.0
Method:,MLE,Df Model:,3.0
Date:,"Thu, 04 Nov 2021",Pseudo R-squ.:,0.05652
Time:,17:07:39,Log-Likelihood:,-1189.1
converged:,True,LL-Null:,-1260.3
Covariance Type:,nonrobust,LLR p-value:,1.109e-30

current_disorder_n=1,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.4437,0.224,-1.979,0.048,-0.883,-0.004
age,-0.0066,0.006,-1.077,0.282,-0.019,0.005
gender_n,0.6583,0.11,5.962,0.0,0.442,0.875
location_US,0.9715,0.105,9.29,0.0,0.767,1.176


In [114]:
pd.DataFrame(np.exp(m2.params))

Unnamed: 0,0
Intercept,0.641652
age,0.993425
gender_n,1.931594
location_US,2.64178


In [116]:
pd.DataFrame(np.exp(m2.conf_int()))

Unnamed: 0_level_0,Unnamed: 1_level_0,lower,upper
current_disorder_n,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Intercept,0.413482,0.995734
1,age,0.981564,1.005429
1,gender_n,1.555669,2.39836
1,location_US,2.15224,3.242669


### Interpretation

- No evidence of age affecting chances of having a current disorder
- As before, there is an effect of gender (female = higher chances)
- Country of residence has an even bigger effect than gender (US = higher chances than non-US location)

# Linear regression: Perceived industry support ~ work benefits + employer formally discussed MH + employer provided MH resources

In [126]:
c3 = data2[['industry_support', 'employer_benefits_n', 'employer_discussed_n', 'employer_resources_n']]
c3.shape

(3592, 4)

In [127]:
c3.dropna(axis = 0, how = 'any', inplace=True)
c3.shape

(736, 4)

In [129]:
# New regression: industry support ~ X

m3 = smf.ols(formula = 'industry_support ~ employer_benefits_n + employer_discussed_n + employer_resources_n', 
             data = c3).fit()

In [130]:
m3.summary()

0,1,2,3
Dep. Variable:,industry_support,R-squared:,0.057
Model:,OLS,Adj. R-squared:,0.053
Method:,Least Squares,F-statistic:,14.82
Date:,"Thu, 04 Nov 2021",Prob (F-statistic):,2.25e-09
Time:,17:33:43,Log-Likelihood:,-975.73
No. Observations:,736,AIC:,1959.0
Df Residuals:,732,BIC:,1978.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.2660,0.069,32.690,0.000,2.130,2.402
employer_benefits_n,0.2314,0.089,2.599,0.010,0.057,0.406
employer_discussed_n,0.3361,0.087,3.874,0.000,0.166,0.506
employer_resources_n,0.0128,0.091,0.141,0.888,-0.166,0.192

0,1,2,3
Omnibus:,6.26,Durbin-Watson:,1.977
Prob(Omnibus):,0.044,Jarque-Bera (JB):,4.978
Skew:,0.102,Prob(JB):,0.083
Kurtosis:,2.652,Cond. No.,5.14


### Interpretation

- Employees who have mental health benefits at work and whose employers have formally discussed mental health also feel that the industry supports them