# The Silent Struggle: Mental Health Data Analysis
Analysis of 2014-2015 Global Mental Health Survey.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Ensure plots display inline
%matplotlib inline

# Set a basic style
plt.style.use('ggplot')

## Load Data

In [None]:
df = pd.read_csv("Mental Health Dataset.csv")
df.head()

## 1. Snapshot: Growing Stress

In [None]:
stress_count = len(df[df['Growing_Stress'] == 'Yes'])
total_count = len(df)
percentage = (stress_count / total_count) * 100

print(f"Percentage of people reporting Growing Stress: {percentage:.1f}%")

## 2. The Weight of Work: Occupation vs Stress

In [None]:
# Calculate percentage of 'Yes' for Growing Stress by Occupation
occ_data = df[df['Growing_Stress'] == 'Yes']['Occupation'].value_counts()
total_data = df['Occupation'].value_counts()
stress_ratio = (occ_data / total_data * 100).sort_values()

# Plot
plt.figure(figsize=(10, 6))
stress_ratio.plot(kind='barh', color='skyblue')
plt.title('Percentage of Growing Stress by Occupation')
plt.xlabel('Percentage')
plt.ylabel('Occupation')
plt.show()

## 3. The Age of Anxiety: Age vs Stress

In [None]:
# Clean data for Age and Growing Stress
age_df = df.dropna(subset=['Age', 'Growing_Stress'])
age_yes = age_df[age_df['Growing_Stress'] == 'Yes']['Age']
age_no = age_df[age_df['Growing_Stress'] == 'No']['Age']

# Plot Boxplot
plt.figure(figsize=(10, 6))
plt.boxplot([age_yes, age_no], labels=['Growing Stress: Yes', 'Growing Stress: No'])
plt.title('Age Distribution by Stress Levels')
plt.ylabel('Age')
plt.grid(True)
plt.show()

## 4. The Geography of Pain: Global Mental Health History

In [None]:
# Filter countries with more than 20 responses
counts = df['Country'].value_counts()
sig_countries = counts[counts > 20].index
df_sig = df[df['Country'].isin(sig_countries)]

# Calculate percentage of Mental Health History = Yes
history_counts = df_sig[df_sig['Mental_Health_History'] == 'Yes']['Country'].value_counts()
total_counts = df_sig['Country'].value_counts()
history_ratio = (history_counts / total_counts * 100).sort_values()

# Plot Scatter (Lollipop-like)
plt.figure(figsize=(10, 8))
plt.scatter(history_ratio.values, history_ratio.index, color='green', s=100)
plt.hlines(y=history_ratio.index, xmin=0, xmax=history_ratio.values, alpha=0.5)
# Add text annotations for each point
for i, (country, percentage) in enumerate(history_ratio.items()):
    plt.text(percentage + 1.5, i, f'{percentage:.1f}%', va='center', fontsize=9, color='#81B29A')
plt.title('Percentage with Mental Health History by Country')
plt.xlabel('Percentage')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

## 5. The Habit Loop: Days Indoors vs Mood Swings

In [None]:
# Define Categories order
mood_order = ['Low', 'Medium', 'High']
indoors_order = ['1-14 days', '15-30 days', '31-60 days', 'More than 2 months', 'Go out Every day']

# Prepare Data
df['Mood_Swings'] = pd.Categorical(df['Mood_Swings'], categories=mood_order, ordered=True)
df['Days_Indoors'] = pd.Categorical(df['Days_Indoors'], categories=indoors_order, ordered=True)

heatmap_data = pd.crosstab(df['Days_Indoors'], df['Mood_Swings'], normalize='columns') * 100

# Plot Heatmap
plt.figure(figsize=(8, 6))
plt.imshow(heatmap_data, cmap='Blues', aspect='auto')
plt.colorbar(label='Percentage')
plt.xticks(range(len(mood_order)), mood_order)
plt.yticks(range(len(indoors_order)), indoors_order)
plt.title('Heatmap: Days Indoors vs Mood Swings')
plt.xlabel('Mood Swings')
plt.ylabel('Days Indoors')

# Add text annotations
for i in range(len(indoors_order)):
    for j in range(len(mood_order)):
        val = heatmap_data.iloc[i, j]
        plt.text(j, i, f'{val:.0f}%', ha='center', va='center', 
                 color='white' if val > 50 else 'black')

plt.show()

## 6. The Coping Mechanism: Coping Struggles

In [None]:
coping_counts = df['Coping_Struggles'].value_counts()

plt.figure(figsize=(6, 6))
plt.pie(coping_counts, labels=coping_counts.index, autopct='%1.1f%%', startangle=90, colors=['salmon', 'lightgray'])
plt.title('Coping Struggles Distribution')
plt.show()

## 7. Hidden Battles: Gender vs Seeking Treatment

In [None]:
# Normalize Gender
df['Gender_Group'] = df['Gender'].apply(lambda x: x if x in ['Male', 'Female'] else 'Non-Binary/Other')

cross_tab = pd.crosstab(df['Gender_Group'], df['treatment'], normalize='index') * 100

cross_tab.plot(kind='barh', stacked=True, figsize=(10, 5), color=['lightgray', 'salmon'])
plt.title('Proportion Seeking Treatment by Gender')
plt.xlabel('Percentage')
plt.ylabel('Gender')
plt.legend(title='Treatment Sought', loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

## 8. Symptom Cluster: Habits, Work, and Social

In [None]:
cols = ['Changes_Habits', 'Work_Interest', 'Social_Weakness']
data_dict = {}

for c in cols:
    data_dict[c] = df[c].value_counts(normalize=True) * 100

symptom_df = pd.DataFrame(data_dict).fillna(0).T

symptom_df.plot(kind='bar', figsize=(10, 6))
plt.title('Impact on Habits, Work Interest, and Social Weakness')
plt.xlabel('Category')
plt.ylabel('Percentage')
plt.legend(title='Response')
plt.xticks(rotation=0)
plt.show()

## 9. Systemic Factors

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

factors = [
    ('care_options', 'Care Options'),
    ('family_history', 'Family History'),
    ('mental_health_interview', 'Open to Interview')
]

for i, (col, title) in enumerate(factors):
    counts = df[col].value_counts(normalize=True)
    axes[i].pie(counts, labels=counts.index, autopct='%1.0f%%', startangle=90)
    axes[i].set_title(title)

plt.tight_layout()
plt.show()