
# Occupational Employment Change Tables (2024-2030)

This notebook summarizes occupational employment change between 2024 and 2030 using the outputs from `scripts/occupation_forecasts_from_segment_totals.py`. We explore:

- Segment-level employment change by methodology
- Education-level employment change across all segments
- Variation across methodologies to highlight areas of divergence


In [None]:

import pandas as pd
import numpy as np
from pathlib import Path
from typing import List

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:,.2f}'.format)

DATA_PATH = Path('..') / 'data' / 'processed' / 'mi_occ_segment_totals_2024_2034.csv'


In [None]:

# Load occupation forecasts covering 2024-2034
forecasts = pd.read_csv(DATA_PATH)

# Ensure methodology labels are consistent
forecasts['methodology'] = forecasts['methodology'].astype(str)

# Filter to the comparison years of interest
comparison_years = [2024, 2030]
comparison_df = forecasts[forecasts['year'].isin(comparison_years)].copy()

print(f"Records considered: {len(comparison_df):,}")
print(f"Methodologies: {sorted(comparison_df['methodology'].unique())}")


In [None]:

def summarize_change(df: pd.DataFrame, group_cols: List[str]) -> pd.DataFrame:
    '''Aggregate employment in 2024 vs 2030 and compute change metrics.'''
    base = (
        df[df['year'] == 2024]
        .groupby(group_cols + ['methodology'], as_index=False)['employment']
        .sum()
        .rename(columns={'employment': 'employment_2024'})
    )

    target = (
        df[df['year'] == 2030]
        .groupby(group_cols + ['methodology'], as_index=False)['employment']
        .sum()
        .rename(columns={'employment': 'employment_2030'})
    )

    summary = base.merge(target, on=group_cols + ['methodology'], how='outer')
    summary[['employment_2024', 'employment_2030']] = summary[['employment_2024', 'employment_2030']].fillna(0)

    summary['abs_change'] = summary['employment_2030'] - summary['employment_2024']
    summary['pct_change'] = np.where(
        summary['employment_2024'] > 0,
        summary['abs_change'] / summary['employment_2024'] * 100,
        np.nan,
    )

    return summary


def compute_methodology_spread(summary: pd.DataFrame, key_cols: List[str]) -> pd.DataFrame:
    '''Identify methodology spread in absolute change for each grouping.'''
    def _spread(group: pd.DataFrame) -> pd.Series:
        min_idx = group['abs_change'].idxmin()
        max_idx = group['abs_change'].idxmax()
        return pd.Series({
            'method_min': group.loc[min_idx, 'methodology'],
            'min_abs_change': group.loc[min_idx, 'abs_change'],
            'method_max': group.loc[max_idx, 'methodology'],
            'max_abs_change': group.loc[max_idx, 'abs_change'],
            'spread_abs_change': group.loc[max_idx, 'abs_change'] - group.loc[min_idx, 'abs_change']
        })

    spread = summary.groupby(key_cols).apply(_spread).reset_index()
    return spread



## Segment-Level Employment Change

Aggregated employment change by supply segment and methodology.


In [None]:

segment_cols = ['segment_id', 'segment_name']
segment_summary = summarize_change(comparison_df, segment_cols)
segment_summary.sort_values(['segment_id', 'methodology'], inplace=True)
segment_summary.head()


In [None]:

# Highlight absolute change by methodology for each segment (wide view)
segment_abs_pivot = segment_summary.pivot(
    index='segment_name', columns='methodology', values='abs_change'
).sort_index()
segment_abs_pivot


In [None]:

# Methodology spread in absolute change for each segment
segment_spread = compute_methodology_spread(segment_summary, ['segment_id', 'segment_name'])
segment_spread.sort_values('spread_abs_change', ascending=False, inplace=True)
segment_spread.head(15)



## Education-Level Employment Change (All Segments)

Summaries aggregated across all segments by education grouping.


In [None]:

edu_cols = ['ep_edu_grouped']
edu_summary = summarize_change(comparison_df, edu_cols)
edu_summary.sort_values(['ep_edu_grouped', 'methodology'], inplace=True)
edu_summary


In [None]:

# Pivot absolute change across methodologies for education groupings
edu_abs_pivot = edu_summary.pivot(
    index='ep_edu_grouped', columns='methodology', values='abs_change'
).sort_index()
edu_abs_pivot


In [None]:

# Spread across methodologies to show divergence by education grouping
edu_spread = compute_methodology_spread(edu_summary, ['ep_edu_grouped'])
edu_spread.sort_values('spread_abs_change', ascending=False, inplace=True)
edu_spread
