In [2]:
import pandas as pd

# Load the OD-level employment commute file
df = pd.read_csv("employment_commute_md_2022_JT00.csv",
                 dtype={"home_geo_id": str, "work_geo_id": str,
                        "home_zcta": str, "work_zcta": str})

# Drop rows where ZIPs are missing (optional, keeps data cleaner)
df = df.dropna(subset=["home_zcta", "work_zcta"])

# Aggregate by ZIP-to-ZIP (ZCTA-to-ZCTA) flows
zcta_summary = (
    df.groupby(["home_zcta", "work_zcta"], as_index=False)
      .agg({
          "commuter_count": "sum",
          "net_commute_flow": "sum"   # average net flow across home blocks
      })
)

# Add metadata
zcta_summary["data_year"] = 2022
zcta_summary["source"] = "LEHD LODES8 OD JT00 (ZIP aggregated)"

# Save ZIP-level commuter flows
zcta_summary.to_csv("employment_commute_md_2022_ZCTA1.csv", index=False)

print("✅ Saved: employment_commute_md_2022_ZCTA1.csv")
print(f"Rows: {len(zcta_summary):,}")
print(zcta_summary.head())


✅ Saved: employment_commute_md_2022_ZCTA1.csv
Rows: 82,157
  home_zcta work_zcta  commuter_count  net_commute_flow  data_year  \
0     19964     21012               1                -9       2022   
1     19964     21061               1                -9       2022   
2     19964     21231               1                -9       2022   
3     19964     21601               2               -18       2022   
4     19964     21607               1                -9       2022   

                                 source  
0  LEHD LODES8 OD JT00 (ZIP aggregated)  
1  LEHD LODES8 OD JT00 (ZIP aggregated)  
2  LEHD LODES8 OD JT00 (ZIP aggregated)  
3  LEHD LODES8 OD JT00 (ZIP aggregated)  
4  LEHD LODES8 OD JT00 (ZIP aggregated)  
