
# Alternative Analysis: Real-World Data for Cleveland Recruitment

This notebook consumes the cleaned datasets from `data_loading_script.ipynb` to build persuasive visuals for STEM graduates considering Cleveland.


In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

sns.set_theme(style="whitegrid")
DATA_DIR = Path("data_cache")
master = pd.read_csv(DATA_DIR / "master_real_world_metrics.csv")
print(f"Loaded master dataset with columns: {list(master.columns)}")
master.head()



## Cost-adjusted STEM wages vs rent burden
We approximate rent burden by dividing median gross rent by annual STEM wages (computer, engineering, and life sciences buckets).


In [None]:

stem_columns = [
    'Software Developers',
    'Electrical Engineers',
    'Biomedical Engineers',
    'Clinical Lab Technologists',
]
available_stem_cols = [c for c in stem_columns if c in master.columns]
if not available_stem_cols:
    raise ValueError("No STEM wage columns available in master dataset")

master['stem_wage_proxy'] = master[available_stem_cols].median(axis=1)
master['annual_rent'] = master['median_gross_rent'] * 12
master['rent_burden_pct'] = (master['annual_rent'] / master['stem_wage_proxy']) * 100

plt.figure(figsize=(10,7))
ax = sns.scatterplot(
    data=master,
    x='unemployment_rate_2023',
    y='rent_burden_pct',
    hue='area_name',
    palette='tab10',
    s=120
)
ax.axhline(30, linestyle='--', color='red', label='30% affordability threshold')
ax.set_xlabel('State Unemployment Rate (2023 avg)')
ax.set_ylabel('Rent as % of Median STEM Wage')
ax.set_title('Cleveland Combines Low Rent Burden with Tight Labor Markets')
ax.legend(bbox_to_anchor=(1.05,1), loc='upper left')
plt.tight_layout()
plt.show()



## Wage benchmarking by occupation
Direct comparison of annual median wages for key STEM tracks.


In [None]:

value_vars = [
    'Software Developers',
    'Electrical Engineers',
    'Biomedical Engineers',
    'Clinical Lab Technologists',
    'Management Analysts',
]
value_vars = [c for c in value_vars if c in master.columns]
long = master.melt(
    id_vars=['area_name'],
    value_vars=value_vars,
    var_name='occupation',
    value_name='median_wage',
)

plt.figure(figsize=(12,6))
sns.barplot(data=long, x='occupation', y='median_wage', hue='area_name')
plt.xticks(rotation=30, ha='right')
plt.ylabel('Annual median wage (USD)')
plt.title('STEM Wage Benchmarks by Metro (OEWS 2023)')
plt.tight_layout()
plt.show()



## Earnings power vs rent
Another lens: purchasing power after housing.


In [None]:

master['net_after_rent'] = master['stem_wage_proxy'] - master['annual_rent']
plt.figure(figsize=(10,6))
sns.barplot(data=master.sort_values('net_after_rent', ascending=False), x='area_name', y='net_after_rent', palette='crest')
plt.xticks(rotation=40, ha='right')
plt.ylabel('Annual STEM Wage minus Rent (USD)')
plt.title('Take-Home Advantage After Housing Costs')
plt.tight_layout()
plt.show()
