# ClayPlotter Test Notebook



This notebook demonstrates the usage of the `ChoroplethPlotter` class to generate maps for USA states and Canadian provinces.

In [None]:
import logging

import pandas as pd

import matplotlib.pyplot as plt

from clayPlotter import ChoroplethPlotter # Import the main class

# Configure logging

In [None]:
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') # Set level to DEBUG

## 1. USA States - Admission Year

# --- Define Sample Data ---

In [None]:
admission_data = {

    'Alabama': 1819, 'Alaska': 1959, 'Arizona': 1912, 'Arkansas': 1836, 'California': 1850,

    'Colorado': 1876, 'Connecticut': 1788, 'Delaware': 1787, 'Florida': 1845, 'Georgia': 1788,

    'Hawaii': 1959, 'Idaho': 1890, 'Illinois': 1818, 'Indiana': 1816, 'Iowa': 1846,

    'Kansas': 1861, 'Kentucky': 1792, 'Louisiana': 1812, 'Maine': 1820, 'Maryland': 1788,

    'Massachusetts': 1788, 'Michigan': 1837, 'Minnesota': 1858, 'Mississippi': 1817,

    'Missouri': 1821, 'Montana': 1889, 'Nebraska': 1867, 'Nevada': 1864, 'New Hampshire': 1788,

    'New Jersey': 1787, 'New Mexico': 1912, 'New York': 1788, 'North Carolina': 1789,

    'North Dakota': 1889, 'Ohio': 1803, 'Oklahoma': 1907, 'Oregon': 1859, 'Pennsylvania': 1787,

    'Rhode Island': 1790, 'South Carolina': 1788, 'South Dakota': 1889, 'Tennessee': 1796,

    'Texas': 1845, 'Utah': 1896, 'Vermont': 1791, 'Virginia': 1788, 'Washington': 1889,

    'West Virginia': 1863, 'Wisconsin': 1848, 'Wyoming': 1890

}

# Convert to DataFrame

In [None]:
location_col_usa = 'State'

value_col_usa = 'Admission Year'

usa_df = pd.DataFrame(list(admission_data.items()), columns=[location_col_usa, value_col_usa])

In [None]:
print("USA Data:")
print(usa_df.head())

# --- Instantiate Plotter and Plot ---

In [None]:
try:

    logging.info("Instantiating plotter for USA states...")

    usa_plotter = ChoroplethPlotter(

        geography_key='usa_states',

        data=usa_df,

        location_col=location_col_usa,

        value_col=value_col_usa

    )



    logging.info("Generating USA plot...")

    # The plot method now handles data prep internally based on config

    # The default geo_join_column='name' in plot() should work if the shapefile has a 'name' column

    # If the shapefile uses a different column (like 'postal'), specify it: geo_join_column='postal'

    fig_usa, ax_usa = usa_plotter.plot(title="USA State Admission Year")



    # Save or show the plot

    output_filename_usa = "my_choropleth_map_usa.png"

    plt.savefig(output_filename_usa)

    logging.info(f"USA map saved to {output_filename_usa}")

    plt.show() # Display the plot in the notebook



except (ValueError, TypeError, FileNotFoundError, RuntimeError, KeyError) as e:

    logging.error(f"Failed to generate USA plot: {e}", exc_info=True)

## 2. Canadian Provinces - Confederation Year

# --- Define Sample Data ---

In [None]:
confederation_data = {

    'Ontario': 1867,

    'Québec': 1867, # Corrected spelling

    'Nova Scotia': 1867,

    'New Brunswick': 1867,

    'Manitoba': 1870,

    'British Columbia': 1871,

    'Prince Edward Island': 1873,

    'Saskatchewan': 1905,

    'Alberta': 1905,

    'Newfoundland and Labrador': 1949,

    'Yukon': 1898, # Territory

    'Northwest Territories': 1870, # Territory

    'Nunavut': 1999 # Territory

}

# Convert to DataFrame

In [None]:
location_col_can = 'Province/Territory'

value_col_can = 'Confederation Year'

can_df = pd.DataFrame(list(confederation_data.items()), columns=[location_col_can, value_col_can])

In [None]:
print("\nCanada Data:")
print(can_df.head())

In [None]:
# --- Instantiate Plotter and Plot ---
try:
    logging.info("Instantiating plotter for Canadian provinces...")
    can_plotter = ChoroplethPlotter(
        geography_key='canada_provinces',
        data=can_df,
        location_col=location_col_can,
        value_col=value_col_can
    )

    logging.info("Generating Canada plot...")
    # Assuming the canada_provinces config and shapefile use 'name' for joining
    fig_can, ax_can = can_plotter.plot(title="Canadian Province/Territory Confederation Year")

    # Save or show the plot
    output_filename_can = "my_choropleth_map_canada.png"
    plt.savefig(output_filename_can)
    logging.info(f"Canada map saved to {output_filename_can}")
    plt.show() # Display the plot in the notebook

except (ValueError, TypeError, FileNotFoundError, RuntimeError, KeyError) as e:
    logging.error(f"Failed to generate Canada plot: {e}", exc_info=True)

In [None]:
## 3. China Provinces - Population (Approx. 2020/2021)
# --- Define Sample Data ---
# Note: Using approximate population figures for demonstration.
# Excludes SARs (Hong Kong, Macau) and Taiwan for simplicity in this example.
china_population_data = {
    'Beijing': 21500000, 'Tianjin': 13900000, 'Hebei': 74600000, 'Shanxi': 34900000,
    'Inner Mongolia': 24000000, 'Liaoning': 42600000, 'Jilin': 24000000, 'Heilongjiang': 31800000,
    'Shanghai': 24900000, 'Jiangsu': 84800000, 'Zhejiang': 64600000, 'Anhui': 61000000,
    'Fujian': 41500000, 'Jiangxi': 45200000, 'Shandong': 101500000, 'Henan': 99400000,
    'Hubei': 57800000, 'Hunan': 66400000, 'Guangdong': 126000000, 'Guangxi': 50100000,
    'Hainan': 10100000, 'Chongqing': 32100000, 'Sichuan': 83600000, 'Guizhou': 38500000,
    'Yunnan': 47200000, 'Tibet': 3600000, 'Shaanxi': 39500000, 'Gansu': 25000000,
    'Qinghai': 5900000, 'Ningxia': 7200000, 'Xinjiang': 25900000
}

# Convert to DataFrame
location_col_chn = 'Province' # Matches 'name_en' potentially in shapefile
value_col_chn = 'Population'
chn_df = pd.DataFrame(list(china_population_data.items()), columns=[location_col_chn, value_col_chn])

print("\nChina Data:")
print(chn_df.head())

# --- Instantiate Plotter and Plot ---
try:
    logging.info("Instantiating plotter for China provinces...")
    # IMPORTANT: The geo_join_column needs to match the column specified in
    # china_provinces.yaml's label_settings.level1_code_column AND the actual shapefile.
    # We are assuming 'name_en' based on the config, but this might need adjustment.
    # Let's try joining on the 'Province' column name we used in the DataFrame,
    # assuming the shapefile has a matching 'name_en' or similar column.
    # The plotter will try to merge on `geo_join_column` (left) and `location_col` (right).
    # We need to ensure the shapefile's join column name is correctly identified.
    # Let's assume the shapefile uses 'name_en' as hinted in the config.
    geo_join_col_chn = 'name_en' # This MUST match a column in the shapefile GDF

    chn_plotter = ChoroplethPlotter(
        geography_key='china_provinces', # Use the new key
        data=chn_df,
        location_col=location_col_chn, # 'Province' from our DataFrame
        value_col=value_col_chn      # 'Population' from our DataFrame
    )

    logging.info("Generating China plot...")
    # Pass the assumed shapefile join column name explicitly
    fig_chn, ax_chn = chn_plotter.plot(
        title="China Province Population (Approx. 2020/21)",
        geo_join_column=geo_join_col_chn # Explicitly tell plotter which GDF column to use
    )

    # Save or show the plot
    output_filename_chn = "my_choropleth_map_china.png"
    plt.savefig(output_filename_chn)
    logging.info(f"China map saved to {output_filename_chn}")
    plt.show() # Display the plot in the notebook

except (ValueError, TypeError, FileNotFoundError, RuntimeError, KeyError) as e:
    logging.error(f"Failed to generate China plot: {e}", exc_info=True)


In [None]:
print("\nNotebook finished.")