In [None]:
import logging

import pandas as pd

import geopandas as gpd

from choropleth_plotter import prepare_geo_data, plot_choropleth_from_config



# Configure logging for more detailed output during data prep

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
# --- 1. Define Sample Data ---

# Example data: State admission year (replace with your actual data)

admission_data = {

    'Alabama': 1819, 'Alaska': 1959, 'Arizona': 1912, 'Arkansas': 1836, 'California': 1850,

    'Colorado': 1876, 'Connecticut': 1788, 'Delaware': 1787, 'Florida': 1845, 'Georgia': 1788,

    'Hawaii': 1959, 'Idaho': 1890, 'Illinois': 1818, 'Indiana': 1816, 'Iowa': 1846,

    'Kansas': 1861, 'Kentucky': 1792, 'Louisiana': 1812, 'Maine': 1820, 'Maryland': 1788,

    'Massachusetts': 1788, 'Michigan': 1837, 'Minnesota': 1858, 'Mississippi': 1817,

    'Missouri': 1821, 'Montana': 1889, 'Nebraska': 1867, 'Nevada': 1864, 'New Hampshire': 1788,

    'New Jersey': 1787, 'New Mexico': 1912, 'New York': 1788, 'North Carolina': 1789,

    'North Dakota': 1889, 'Ohio': 1803, 'Oklahoma': 1907, 'Oregon': 1859, 'Pennsylvania': 1787,

    'Rhode Island': 1790, 'South Carolina': 1788, 'South Dakota': 1889, 'Tennessee': 1796,

    'Texas': 1845, 'Utah': 1896, 'Vermont': 1791, 'Virginia': 1788, 'Washington': 1889,

    'West Virginia': 1863, 'Wisconsin': 1848, 'Wyoming': 1890,

    # 'District of Columbia': 1790 # Example: DC might not be in all shapefiles as a 'state'

}

In [None]:
# --- 2. Prepare Data using the new function ---

# This function handles downloading shapefiles and merging the data_dict

value_col = 'admission_year'

map_resolution = '50m' # Or '10m', '110m'

data_ready = False # Initialize
states_gdf, lakes_gdf, countries_gdf, neighbors_states_gdf = None, None, None, None # Initialize

try:

    logging.info(f"Starting data preparation with resolution '{map_resolution}'...")

    states_gdf, lakes_gdf, countries_gdf, neighbors_states_gdf = prepare_geo_data(

        data_dict=admission_data,

        value_column_name=value_col,

        resolution=map_resolution,

        state_name_key='NAME' # Assuming keys in admission_data are state names matching shapefile

    )

    logging.info("Data preparation function finished.")



    # Basic check if states_gdf was successfully created and populated

    if states_gdf is None or states_gdf.empty:

        logging.error("Data preparation failed to produce a valid states GeoDataFrame.")

        # data_ready remains False

    else:

        logging.info(f"Successfully prepared states GeoDataFrame with {len(states_gdf)} entries.")

        data_ready = True



except ValueError as e:

    logging.error(f"Error during data preparation setup: {e}")

    # data_ready remains False

except Exception as e:

    logging.error(f"An unexpected error occurred during data preparation: {e}", exc_info=True)

    # data_ready remains False

In [None]:
# --- 3. Plot using the new function and YAML config ---

if data_ready:

    logging.info("Proceeding to plot US States using plot_choropleth_from_config...")

    try:

        plot_choropleth_from_config(

            config_path='resources/usa_states.yaml',

            level1_gdf=states_gdf, # Pass the prepared states GeoDataFrame

            value_column_name=value_col, # Pass the value column name

            lakes_gdf=lakes_gdf, # Pass the prepared lakes data

            countries_gdf=countries_gdf, # Pass the prepared countries data

            neighbors_level1_gdf=neighbors_states_gdf # Pass the prepared neighbor data

        )

        logging.info("US plotting function executed.")

    except FileNotFoundError:

        logging.error("Plotting failed: Could not find the configuration file 'resources/usa_states.yaml'.")

    except KeyError as e:

         logging.error(f"Plotting failed: Missing expected key in configuration or data - {e}", exc_info=True)

    except Exception as e:

        logging.error(f"An unexpected error occurred during US plotting: {e}", exc_info=True)

else:

    logging.error("Skipping US plotting because data preparation failed or resulted in empty/invalid GeoDataFrame.")

In [None]:
print("\nExample script finished.")

In [None]:
# --- 4. Plot Canadian Provinces Confederation Year ---
import logging
import pandas as pd
import geopandas as gpd
# from choropleth_plotter import prepare_geo_data, plot_choropleth_from_config # Already imported

logging.info("\n--- Starting Canadian Province Plot ---")

# Data: Canadian provinces/territories and year they joined Confederation or were created
# Using names likely found in Natural Earth 'name' column
confederation_data = {
    'Ontario': 1867,
    'Quebec': 1867,
    'Nova Scotia': 1867,
    'New Brunswick': 1867,
    'Manitoba': 1870,
    'British Columbia': 1871,
    'Prince Edward Island': 1873,
    'Saskatchewan': 1905,
    'Alberta': 1905,
    'Newfoundland and Labrador': 1949,
    'Yukon': 1898, # Territory
    'Northwest Territories': 1870, # Territory
    'Nunavut': 1999 # Territory
}

# Prepare data for Canada
canada_value_col = 'confederation_year'
canada_map_resolution = '50m' # Match resolution used for US example for consistency
canada_data_ready = False
provinces_gdf, ca_lakes_gdf, ca_countries_gdf, ca_neighbors_gdf = None, None, None, None # Initialize

try:
    logging.info(f"Starting Canadian data preparation with resolution '{canada_map_resolution}'...")
    # Assuming prepare_geo_data can handle Canadian data based on names matching the shapefile
    provinces_gdf, ca_lakes_gdf, ca_countries_gdf, ca_neighbors_gdf = prepare_geo_data(
        data_dict=confederation_data,
        value_column_name=canada_value_col,
        resolution=canada_map_resolution,
        state_name_key='name' # Key in shapefile GDF to match dict keys (verify this matches shapefile column)
    )
    logging.info("Canadian data preparation function finished.")

    if provinces_gdf is None or provinces_gdf.empty:
        logging.error("Canadian data preparation failed to produce a valid provinces GeoDataFrame.")
    else:
        logging.info(f"Successfully prepared provinces GeoDataFrame with {len(provinces_gdf)} entries.")
        # Optional: Check for unmatched names
        # Filter GDF for CA before checking names, assuming 'iso_a2' column exists
        if 'iso_a2' in provinces_gdf.columns:
             matched_names = set(provinces_gdf[provinces_gdf['iso_a2'] == 'CA']['name'].unique())
        else:
             logging.warning("Column 'iso_a2' not found in provinces_gdf, cannot filter for Canada to check names accurately.")
             matched_names = set(provinces_gdf['name'].unique()) # Check all names found
        
        expected_names = set(confederation_data.keys())
        missing_names = expected_names - matched_names
        if missing_names:
            logging.warning(f"Could not match the following Canadian names from data_dict: {missing_names}")
        # Note: provinces_gdf might contain non-Canadian regions if prepare_geo_data doesn't filter internally.
        # plot_choropleth_from_config should handle filtering based on the config's country_codes.
        canada_data_ready = True

except ValueError as e:
    logging.error(f"Error during Canadian data preparation setup: {e}")
except Exception as e:
    logging.error(f"An unexpected error occurred during Canadian data preparation: {e}", exc_info=True)


# Plot Canada using the Canadian config
if canada_data_ready:
    logging.info("Proceeding to plot Canada using plot_choropleth_from_config...")
    try:
        plot_choropleth_from_config(
            config_path='resources/canada_provinces.yaml',
            level1_gdf=provinces_gdf, # Pass the potentially larger GDF; plotting func should filter
            value_column_name=canada_value_col,
            lakes_gdf=ca_lakes_gdf,
            countries_gdf=ca_countries_gdf,
            neighbors_level1_gdf=ca_neighbors_gdf # This might contain US states etc.
        )
        logging.info("Canadian plotting function executed.")
    except FileNotFoundError:
        logging.error("Plotting failed: Could not find the configuration file 'resources/canada_provinces.yaml'.")
    except KeyError as e:
         logging.error(f"Plotting failed: Missing expected key in Canadian configuration or data - {e}", exc_info=True)
    except Exception as e:
        logging.error(f"An unexpected error occurred during Canadian plotting: {e}", exc_info=True)
else:
    logging.error("Skipping Canadian plotting because data preparation failed or resulted in empty/invalid GeoDataFrame.")

print("\nCanadian plot section finished.")