## Import Needed Libraries and Filepaths

In [16]:
from texas_gerrymandering_hb4.config import INTERIM_DATA_DIR, CLEAN_ELECTION_RESULTS, CLEAN_VTD_GEO
import pandas as pd
import geopandas as gpd

In [17]:
joined_parquet = INTERIM_DATA_DIR/"joined_vtd_data.parquet"

## Load Data

In [18]:
# Election Results
election_df = pd.read_csv(CLEAN_ELECTION_RESULTS)

In [19]:
# VTD Geospatial Data
vtds_gdf = gpd.read_parquet(CLEAN_VTD_GEO)

## Ensure Keys Exist and Match

In [20]:
# --- Ensure keys exist and match ---
# Prefer existing 'cntyvtd' if present; otherwise synthesize from fips + vtd
if "cntyvtd" not in election_df:
    # make sure you have a 5-digit county fips and a vtd column
    election_df["fips"] = election_df["fips"].astype(str).str.zfill(5)
    election_df["vtd"] = election_df["vtd"].astype(str).str.upper().str.strip()
    election_df["cntyvtd"] = (election_df["fips"] + election_df["vtd"]).str.upper()

if "cntyvtd" not in vtds_gdf:
    # some VTD layers have 'cnty' (county fips) + 'vtd'
    if "cnty" in vtds_gdf and "vtd" in vtds_gdf:
        vtds_gdf["cntyvtd"] = vtds_gdf["cnty"].astype(str).str.zfill(5) + vtds_gdf["vtd"].astype(str).str.upper()
    # or use an existing variant
    elif "cntyvtd" in vtds_gdf:
        pass
    else:
        raise ValueError("Could not find or build 'cntyvtd' in VTD layer.")

In [21]:
# --- Join ---
gdf = vtds_gdf.merge(election_df, on="cntyvtd", how="left")

In [22]:
# 1) share of unmatched VTDs
# replace "total_votes" with whichever column you want to test
unmatched = gdf["total_votes"].isna().mean()
print(f"Share of VTDs without result rows: {unmatched:.3%}")

# 2) sanity: no duplicate CNTYVTDs on the results side
dupes = election_df["cntyvtd"].duplicated().sum()
print("Duplicated cntyvtd in results:", dupes)

Share of VTDs without result rows: 95.387%
Duplicated cntyvtd in results: 0


In [23]:
gdf.to_parquet(joined_parquet, index=False)
print("Wrote:", joined_parquet)

Wrote: /home/aimlexpert/Documents/GitHub/texas-gerrymandering-HB4/data/interim/joined_vtd_data.parquet
