# Install Required Libraries and Dependencies and Read In Initial Dataset

In [2]:
# Import necessary libraries and components

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import patsy as pt
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from scipy.stats import chi2_contingency

In [None]:
# Read in initial dataset
# Create a datafram for initial dataset
# Inspect first several rows

tech_df = pd.read_csv("https://img1.wsimg.com/blobby/go/95a603d1-9621-42dd-ac5a-bc61ff2699b4/downloads/46e78d8b-3941-4da9-a204-93f612493201/data.csv?ver=1758495217438")
tech_df.head()

In [None]:
tech_df.describe()

### Description of Initial Dataset
1,242 rows each with 11 variables:
- tech_company
- benefits
- workplace_resources
- mh_employer_discussion
- mh_coworker_discussion
- medical_coverage
- mental_health
- mh_share
- age
- gender
- country

In [None]:
# Loop through columns to describe each variable and its value counts

columns_to_count = ['tech_company', 'benefits', 'workplace_resources',	'mh_employer_discussion',	'mh_coworker_discussion',	'medical_coverage',	'mental_health',	'mh_share',	'age',	'gender',	'country']

for column in columns_to_count:
    print(f"Value counts for column '{column}':")
    print(tech_df[column].value_counts())
    print("\n")


# Exploratory Data Analysis:

In [None]:
# Step 2: Exploratory Data Analysis (EDA)
# --------------------------------------------------

# Bar chart of mental health reports - all respondents
sns.countplot(data=tech_df, x="mental_health", order=["Yes", "No"])
plt.title("Reported Mental Health Issues - All Respondents")
plt.show()

In [None]:
# Bar chart of mental health reports - only respondents working for tech company

sns.countplot(data=tech_df[tech_df['tech_company'] == "Yes"], x="mental_health", order=["Yes", "No"])
plt.title("Reported Mental Health Issues - Tech Respondents")
plt.show()


In [None]:
# Stacked bar: Benefits vs Mental Health
ct = pd.crosstab(tech_df["benefits"], tech_df["mental_health"], normalize="index")
ct.plot(kind="bar", stacked=True, figsize=(6,4), colormap="viridis")
plt.title("Mental Health by Benefits")
plt.ylabel("Proportion")
plt.show()

In [None]:
# Stacked bar: Resources vs Mental Health
ct2 = pd.crosstab(tech_df["workplace_resources"], tech_df["mental_health"], normalize="index")
ct2.plot(kind="bar", stacked=True, figsize=(6,4), colormap="plasma")
plt.title("Mental Health by Workplace Resources")
plt.ylabel("Proportion")
plt.show()

In [None]:
# Boxplot: Age by mental health
sns.boxplot(data=tech_df, x="mental_health", y="age", order=["Yes", "No"], palette="Set2")
plt.title("Age Distribution by Mental Health")
plt.show()