# mental health trends - exploratory analysis

quick exploration of search patterns and what they reveal

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

%load_ext autoreload
%autoreload 2

## load data

In [None]:
df = pd.read_csv('../data/processed/clean_trends.csv', index_col=0, parse_dates=True)
print(f"shape: {df.shape}")
print(f"date range: {df.index[0]} to {df.index[-1]}")
df.head()

## basic trends

In [None]:
# plot main terms
terms = ['depression', 'anxiety', 'therapy', 'burnout']

fig, ax = plt.subplots()
for term in terms:
    ax.plot(df.index, df[term], label=term, linewidth=2)

ax.axvline(pd.to_datetime('2020-03-11'), color='red', linestyle='--', alpha=0.5, label='covid declared')
ax.legend()
ax.set_title('mental health search trends')
ax.set_xlabel('date')
ax.set_ylabel('search interest')
plt.tight_layout()
plt.show()

## covid impact analysis

In [None]:
# compare pre-covid vs during covid
pre_covid = df.loc[:'2020-03-01']
during_covid = df.loc['2020-03-01':]

comparison = pd.DataFrame({
    'pre_covid': pre_covid[terms].mean(),
    'during_covid': during_covid[terms].mean()
})

comparison['change_%'] = ((comparison['during_covid'] - comparison['pre_covid']) / comparison['pre_covid'] * 100)

print(comparison)

# visualize
comparison[['pre_covid', 'during_covid']].plot(kind='bar', rot=45)
plt.title('average search interest: pre vs during covid')
plt.ylabel('interest score')
plt.tight_layout()
plt.show()

## correlation analysis

In [None]:
# how do different terms correlate?
corr_matrix = df[terms].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('correlation between mental health terms')
plt.tight_layout()
plt.show()

## seasonal patterns

In [None]:
# monthly averages
df_monthly = df.copy()
df_monthly['month'] = df_monthly.index.month

monthly_avg = df_monthly.groupby('month')[terms].mean()

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for i, term in enumerate(terms):
    axes[i].plot(monthly_avg.index, monthly_avg[term], marker='o', linewidth=2)
    axes[i].set_title(f'{term} - seasonal pattern')
    axes[i].set_xlabel('month')
    axes[i].set_ylabel('average interest')
    axes[i].set_xticks(range(1, 13))
    axes[i].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## trend analysis

In [None]:
# calculate year-over-year growth
df_yearly = df[terms].resample('Y').mean()
df_yearly.index = df_yearly.index.year

print("yearly averages:")
print(df_yearly)

df_yearly.plot(kind='line', marker='o', linewidth=2)
plt.title('yearly average trends')
plt.xlabel('year')
plt.ylabel('average interest')
plt.legend(title='term')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## distribution analysis

In [None]:
# box plots to see distributions
fig, ax = plt.subplots(figsize=(10, 6))
df[terms].boxplot(ax=ax)
ax.set_title('distribution of search interest')
ax.set_ylabel('interest score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## rolling averages

In [None]:
# smooth out noise with rolling averages
term = 'anxiety'

plt.figure(figsize=(14, 6))
plt.plot(df.index, df[term], alpha=0.3, label='raw')
plt.plot(df.index, df[term].rolling(4).mean(), linewidth=2, label='4-week avg')
plt.plot(df.index, df[term].rolling(12).mean(), linewidth=2, label='12-week avg')

plt.title(f'{term} - rolling averages')
plt.xlabel('date')
plt.ylabel('interest')
plt.legend()
plt.tight_layout()
plt.show()

## key insights

- covid caused a massive spike in all mental health searches
- burnout has been steadily rising even pre-pandemic
- strong seasonal pattern: searches peak in winter months
- depression and anxiety are highly correlated (r > 0.8)
- searches haven't returned to pre-covid levels