# Pennsylvania

## Full raw data

In [None]:
from utils.data_utils import load_raw_data, visualize_map_with_geometry
from pathlib import Path

# Flatten the JSON structure into a DataFrame
df = load_raw_data(Path('data/IA_raw_data.json'))

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

# Load the dataframe (assuming it's already loaded as `df`)
gdf = gpd.GeoDataFrame(df, geometry=df['geometry'])  # Ensure geometry is parsed

# Step 1: Create grid-like state space
gdf['centroid'] = gdf.geometry.centroid  # Calculate centroids for each county
gdf['grid_x'] = gdf['centroid'].apply(lambda p: round(p.x))
gdf['grid_y'] = gdf['centroid'].apply(lambda p: round(p.y))

# Map counties to grid cells
grid_mapping = {}
for _, row in gdf.iterrows():
    grid_mapping[(row['grid_x'], row['grid_y'])] = row['county']

# Step 2: Validate adjacency
def is_adjacent(county, neighbor_county):
    """Check if two counties are adjacent based on the adjacency list."""
    adjacencies = gdf[gdf['county'] == county]['adj'].values[0]
    return neighbor_county in adjacencies

# Create a dictionary for adjacency checks
grid_adjacency = {}
for (x, y), county in grid_mapping.items():
    # Find all 8 possible neighboring cells
    neighbors = [
        (x - 1, y - 1), (x, y - 1), (x + 1, y - 1),
        (x - 1, y),                 (x + 1, y),
        (x - 1, y + 1), (x, y + 1), (x + 1, y + 1),
    ]
    grid_adjacency[county] = [
        grid_mapping.get(neighbor)
        for neighbor in neighbors
        if grid_mapping.get(neighbor)
    ]

# Verify adjacency consistency
for county, neighbors in grid_adjacency.items():
    for neighbor in neighbors:
        if not is_adjacent(county, neighbor):
            print(f"Adjacency mismatch for {county} and {neighbor}")

# Step 3: Visualize the grid
plt.figure(figsize=(10, 8))
for (x, y), county in grid_mapping.items():
    plt.scatter(x, y, label=county)
    plt.text(x, y, county, fontsize=8, ha='center', va='center')
plt.title("Grid State Space for RL Problem")
plt.xlabel("Grid X")
plt.ylabel("Grid Y")
plt.show()

In [None]:
adjency_mapping = {}

for index, row in enumerate(df.itertuples()):
    adjency_mapping[index] = row.adj

adjency_mapping

In [None]:
metrics = {
    "total": [("vap", "Voting Age Population")],
    "mean": [],
    "ratio": [[("pre_20_dem_bid", "Biden"), ("pre_20_rep_tru", "Trump")]]
}
visualize_map_with_geometry(df, geometry_col="geometry", district_id_col="cd_2020", state="Pennsylvania", metrics=metrics)

In [None]:
# largest amount of adjacent nodes:
# max(list(map(len, graph.values())))  # need to make a graph, but it's 8