In [4]:
#pip install vega_datasets altair


In [None]:
import pandas as pd
import altair as alt
from vega_datasets import data as vega_data
import os


df = pd.read_csv("shopping_behavior_updated.csv")

# FIPS mapping 
state_fips = {
    "Alabama": 1, "Alaska": 2, "Arizona": 4, "Arkansas": 5, "California": 6,
    "Colorado": 8, "Connecticut": 9, "Delaware": 10, "Florida": 12, "Georgia": 13,
    "Hawaii": 15, "Idaho": 16, "Illinois": 17, "Indiana": 18, "Iowa": 19,
    "Kansas": 20, "Kentucky": 21, "Louisiana": 22, "Maine": 23, "Maryland": 24,
    "Massachusetts": 25, "Michigan": 26, "Minnesota": 27, "Mississippi": 28,
    "Missouri": 29, "Montana": 30, "Nebraska": 31, "Nevada": 32, "New Hampshire": 33,
    "New Jersey": 34, "New Mexico": 35, "New York": 36, "North Carolina": 37,
    "North Dakota": 38, "Ohio": 39, "Oklahoma": 40, "Oregon": 41, "Pennsylvania": 42,
    "Rhode Island": 44, "South Carolina": 45, "South Dakota": 46, "Tennessee": 47,
    "Texas": 48, "Utah": 49, "Vermont": 50, "Virginia": 51, "Washington": 53,
    "West Virginia": 54, "Wisconsin": 55, "Wyoming": 56,
}

# regions for US 
region_map = {
    "Maine": "Northeast", "New Hampshire": "Northeast", "Vermont": "Northeast",
    "Massachusetts": "Northeast", "Rhode Island": "Northeast", "Connecticut": "Northeast",
    "New York": "Northeast", "New Jersey": "Northeast", "Pennsylvania": "Northeast",
    "Wisconsin": "Midwest", "Michigan": "Midwest", "Illinois": "Midwest",
    "Indiana": "Midwest", "Ohio": "Midwest", "North Dakota": "Midwest",
    "South Dakota": "Midwest", "Nebraska": "Midwest", "Kansas": "Midwest",
    "Minnesota": "Midwest", "Iowa": "Midwest", "Missouri": "Midwest",
    "Delaware": "South", "Maryland": "South", "Virginia": "South",
    "West Virginia": "South", "Kentucky": "South", "North Carolina": "South",
    "South Carolina": "South", "Tennessee": "South", "Georgia": "South",
    "Florida": "South", "Alabama": "South", "Mississippi": "South",
    "Arkansas": "South", "Louisiana": "South", "Texas": "South", "Oklahoma": "South",
    "Washington": "West", "Oregon": "West", "California": "West", "Nevada": "West",
    "Arizona": "West", "New Mexico": "West", "Idaho": "West", "Montana": "West",
    "Wyoming": "West", "Utah": "West", "Colorado": "West", "Alaska": "West",
    "Hawaii": "West"
}

# Aggregate to state level
state_stats = (
    df.groupby("Location")
    .agg(
        avg_purchase=("Purchase Amount (USD)", "mean"),
        total_customers=("Customer ID", "nunique"),
        transaction_count=("Customer ID", "size"),
    )
    .reset_index()
)

# Add FIPS state id and Region
state_stats["state_id"] = state_stats["Location"].map(state_fips)
state_stats["Region"] = state_stats["Location"].map(region_map)

# Drop any rows that did not map correctly
state_stats = state_stats.dropna(subset=["state_id", "Region"])

# --- Altair Visualization ---
# Topojson for US states
states = alt.topo_feature(vega_data.us_10m.url, "states")

# 1. Create the interactive selection widget for regions
region_selector = alt.selection_point(
    fields=['Region'],
    bind=alt.binding_select(
        options=[None] + sorted(list(set(region_map.values()))),
        name='Filter by Region: ',
        labels=['All'] + sorted(list(set(region_map.values())))
    ),
    name="RegionSelector",
    empty='all'
)

# Hover selection for visual highlight
hover = alt.selection_single(
    fields=["id"],
    on="mouseover",
    empty="none",
    clear="mouseout",
)

# Build the choropleth
choropleth_us = (
    alt.Chart(states)
    .mark_geoshape(stroke='white')
    .encode(
        color=alt.condition(
            "(!isDefined(RegionSelector_Region) || RegionSelector_Region === null) || datum.Region == RegionSelector_Region",
            alt.Color(
                "avg_purchase:Q",
                title="Average purchase (USD)",
                scale=alt.Scale(scheme="blues"),
            ),
            alt.value("lightgray")
        ),
        tooltip=[
            alt.Tooltip("Location:N", title="State"),
            alt.Tooltip("avg_purchase:Q", title="Average purchase", format=".2f"),
            alt.Tooltip("total_customers:Q", title="Total customers"),
            alt.Tooltip("Region:N"),
            alt.Tooltip("transaction_count:Q", title="Transaction count"),
        ],
        stroke=alt.condition(hover, alt.value("black"), alt.value("white")),
        strokeWidth=alt.condition(hover, alt.value(1), alt.value(0.5)),
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(
            state_stats,
            key="state_id",
            fields=["Location", "avg_purchase", "total_customers", "transaction_count", "Region"],
        ),
    )
    .add_params(region_selector)
    .add_selection(hover)
    .project(type="albersUsa")
    .properties(
        width=600,
        height=450,
        title="Average Purchase Amount in each State",
    )
)

# Display the final chart
choropleth_us

output_file = 'choropleth.html'
choropleth_us.save(output_file)
print(f"Saved chart to: {os.path.abspath(output_file)}")

Saved chart to: /Users/hariniavula/Documents/DS 4200/FinalWebsite-main/DS4200_Project-main/choropleth.html


Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(hover)
