In [2]:
import pandas as pd 

In [5]:
df = pd.read_csv("../data/the_rise_of_healthcare_jobs_disclosed_data_by_msa.csv")
df.head()
df = df.iloc[33:].reset_index(drop=True)



In [7]:
import altair as alt


In [8]:

top10 = df.nlargest(10, 'healthcare_share_prime2022').copy()
alt.Chart(top10).mark_bar(cornerRadiusTopLeft=3, cornerRadiusTopRight=3).encode(
    y=alt.Y(
        'metro_title:N',
        sort='-x',
        title=None,  
        axis=alt.Axis(
            labelFontSize=13,
            labelLimit=350,
            title=None
        )
    ),
    x=alt.X(
        'healthcare_share_prime2022:Q',
        title='Healthcare Employment Share (2022)',
        axis=alt.Axis(
            format='.0%',
            labelFontSize=12,
            titleFontSize=14,
            grid=False  
        )
    ),
    color=alt.Color(
        'healthcare_share_prime2022:Q',
        scale=alt.Scale(range=['#ecd2c2', '#a05252', '#800000']),
        legend=None
    ),
    tooltip=['metro_title', 'healthcare_share_prime2022']
).properties(
    title='Top 10 MSAs by Healthcare Employment Share (2022)',
    width=720,
    height=500
).configure_title(
    fontSize=18,
    font='Lato',
    anchor='start'
).configure_axis(
    labelFont='Lato',
    titleFont='Lato',
    grid=False  
).configure_view(
    strokeWidth=0
)


In [9]:

# Step 1: Prepare data
top10 = df.nlargest(10, 'hc_emp_share_prime_change').copy()
top10['zero'] = 0  # baseline for stems

# Step 2: Base chart
base = alt.Chart(top10).encode(
    x=alt.X(
        'metro_title:N',
        sort='-y',
        axis=alt.Axis(
            labelAngle=-30,          
            labelFontSize=10,       
            labelLimit=250,         
            title=None
        )
    ),
    y=alt.Y(
        'hc_emp_share_prime_change:Q',
        title='Increase in Healthcare Employment Share (1980–2022)',
        axis=alt.Axis(format='.1%', labelFontSize=11, titleFontSize=13, grid=False)
    )
)

# Step 3: Lollipop stems
stems = base.mark_rule(stroke='#a05252', strokeWidth=2).encode(
    y='zero:Q',
    y2='hc_emp_share_prime_change:Q'
)

# Step 4: Circle heads
dots = base.mark_circle(size=120, color='#800000').encode(
    tooltip=[
        alt.Tooltip('metro_title:N', title='MSA'),
        alt.Tooltip('hc_emp_share_prime_change:Q', title='Change (%)', format='.2%')
    ]
)

# Step 5: Combine and style
chart = (stems + dots).properties(
    title='Top 10 MSAs with the Largest Increase in Healthcare Employment Share (1980–2022)',
    width=720,
    height=450
).configure_title(
    fontSize=18,
    font='Lato',
    anchor='start'
).configure_axis(
    labelFont='Lato',
    titleFont='Lato',
    grid=False
).configure_view(
    strokeWidth=0
)

chart


In [10]:

chart = (
    alt.Chart(df)
    .mark_circle(
        size=80,
        color='#800000',   
        opacity=0.8
    )
    .encode(
        x=alt.X(
            'manu_share_prime_change:Q',
            title='Change in Manufacturing Employment Share (1980–2022)',
            axis=alt.Axis(format='.1%', labelFontSize=11, titleFontSize=13)
        ),
        y=alt.Y(
            'hc_emp_share_prime_change:Q',
            title='Change in Healthcare Employment Share (1980–2022)',
            axis=alt.Axis(format='.1%', labelFontSize=11, titleFontSize=13)
        ),
        tooltip=[
            alt.Tooltip('metro_title:N', title='MSA'),
            alt.Tooltip('hc_emp_share_prime_change:Q', title='Healthcare Change (%)', format='.2%'),
            alt.Tooltip('manu_share_prime_change:Q', title='Manufacturing Change (%)', format='.2%')
        ]
    )
    .properties(
        title='Healthcare Employment Growth vs Manufacturing Employment Decline (1980–2022)',
        width=720,
        height=500
    )
    .configure_title(font='Lato', fontSize=18, anchor='start')
    .configure_axis(labelFont='Lato', titleFont='Lato', grid=True)
    .configure_view(strokeWidth=0)
)

chart


In [11]:

chart = (
    alt.Chart(df)
    .mark_circle(size=80, color='#1E88E5', opacity=0.75)  
    .encode(
        x=alt.X(
            'change_college:Q',
            title='Change in % College Educated (1980–2022)',
            axis=alt.Axis(format='.1%', labelFontSize=11, titleFontSize=13)
        ),
        y=alt.Y(
            'hc_emp_share_prime_change:Q',
            title='Change in Healthcare Employment Share (1980–2022)',
            axis=alt.Axis(format='.1%', labelFontSize=11, titleFontSize=13)
        ),
        tooltip=[
            alt.Tooltip('metro_title:N', title='MSA'),
            alt.Tooltip('change_college:Q', title='Change in College (%)', format='.2%'),
            alt.Tooltip('hc_emp_share_prime_change:Q', title='Change in Healthcare (%)', format='.2%')
        ]
    )
    .properties(
        title='Education Growth vs Healthcare Employment Growth (1980–2022)',
        width=720,
        height=500
    )
    .configure_title(font='Lato', fontSize=18, anchor='start')
    .configure_axis(labelFont='Lato', titleFont='Lato', grid=True)
    .configure_view(strokeWidth=0)
)

chart


In [12]:
alt.Chart(df).mark_circle(opacity=0.7).encode(
    x='change_earnings:Q',
    y='hc_emp_share_prime_change:Q',
    size='ln_msa_pop2022:Q',
    color=alt.value('#004B87'),
    tooltip=['metro_title', 'change_earnings', 'hc_emp_share_prime_change', 'ln_msa_pop2022']
).properties(
    title='Earnings Growth vs Healthcare Employment Growth, Bubble Size = Population',
    width=720, height=500
)


In [14]:
cleaned_data = pd.read_csv("../data/Cleaned_data.csv")

cleaned_data.columns

Index(['metro13', 'metro_title', 'low', 'high', 'AverageDTI',
       'healthcare_share_prime2022', 'change_ln_population',
       'not_lbfr_share_prime_change', 'unemployed_share_prime_change',
       'non_hc_share_prime_change', 'hc_emp_share_prime_change',
       'manu_share_prime_change', 'non_manu_share_prime_change',
       'manufacturing_share_prime1980', 'ln_msa_pop2022', 'ln_msa_pop1980',
       'medicare_share1980', 'medicare_share2022', 'change_medicare_share',
       'ln_aearn1980', 'ln_aearn2022', 'change_earnings', 'college1980',
       'college2022', 'change_college', 'Unnamed: 25',
       'healthcare_share_prime1980', 'change_non_hc_share_lbfr',
       'change_ln_non_hc', 'non_hc_manu_share_prime_change'],
      dtype='object')

In [15]:

chart = (
    alt.Chart(cleaned_data)   
    .mark_circle(size=80, color='#800000', opacity=0.75)
    .encode(
        x=alt.X(
            'AverageDTI:Q',
            title='Average Debt-to-Income Ratio (Q4 2022)',
            axis=alt.Axis(format='.0%', labelFontSize=11, titleFontSize=13)
        ),
        y=alt.Y(
            'healthcare_share_prime2022:Q',
            title='Healthcare Employment Share (2022)',
            axis=alt.Axis(format='.0%', labelFontSize=11, titleFontSize=13)
        ),
        tooltip=[
            alt.Tooltip('metro_title:N', title='MSA'),
            alt.Tooltip('AverageDTI:Q', title='Average DTI', format='.2%'),
            alt.Tooltip('healthcare_share_prime2022:Q', title='Healthcare Share', format='.2%')
        ]
    )
    .properties(
        title='Average Debt-to-Income Ratio vs Healthcare Employment Share (2022)',
        width=720,
        height=500
    )
    .configure_title(font='Lato', fontSize=18, anchor='start')
    .configure_axis(labelFont='Lato', titleFont='Lato', grid=True)
    .configure_view(strokeWidth=0)
)

chart
