In [None]:
import pandas as pd
import altair as alt
import webbrowser
import os

df = pd.read_csv('shopping_behavior_updated.csv')

# keep only Massachusetts
df_ma = df[df['Location'] == 'Massachusetts']

# Aggregate to Category x Season for MA only
heat_df = (
    df_ma.groupby(['Category', 'Season'])
         .size()
         .reset_index(name='count')
)

# Percent of each season total (within MA only)
heat_df['percent_of_season'] = (
    heat_df.groupby('Season')['count']
           .transform(lambda x: x / x.sum() * 100)
)

season_order = ['Winter', 'Spring', 'Summer', 'Fall']

# Legend based selection on Category
cat_sel = alt.selection_point(fields=['Category'], bind='legend')

heatmap = (
    alt.Chart(heat_df)
    .mark_rect()
    .encode(
        x=alt.X('Season:N',
                title='Season',
                sort=season_order),
        y=alt.Y('Category:N',
                title='Product Category'),
        color=alt.Color('count:Q',
                        title='Number of Purchases (MA only)',
                        scale=alt.Scale(scheme='oranges')),
        opacity=alt.condition(cat_sel, alt.value(1), alt.value(0.25)),
        tooltip=[
            alt.Tooltip('Category:N', title='Category'),
            alt.Tooltip('Season:N', title='Season'),
            alt.Tooltip('count:Q', title='Exact count'),
            alt.Tooltip('percent_of_season:Q',
                        title='Percent of MA season total',
                        format='.2f')
        ]
    )
    .add_params(cat_sel)
    .properties(
        title='Product Categories by Season for Massachusetts Customers',
        width=500,
        height = 400
    )
)

out = 'seasonal_category_heatmap_ma.html'
heatmap.save(out)
print(os.path.abspath(out))



/Users/hariniavula/Documents/DS 4200/FinalWebsite-main/DS4200_Project-main/seasonal_category_heatmap_ma.html
