In [1]:
import pandas as pd

# Load the datasets
df_v14 = pd.read_csv("../datasets/customer_shopping_data_v14.csv")
df_v23 = pd.read_csv("../datasets/customer_shopping_data_v23.csv")

# Merge cluster info into v14
df_merged = df_v14.copy()
df_merged["cluster"] = df_v23["cluster"]

# Convert invoice_date to datetime and extract month
df_merged["invoice_date"] = pd.to_datetime(df_merged["invoice_date"])
df_merged["month"] = df_merged["invoice_date"].dt.to_period("M")

# Aggregate metrics for each cluster per month
monthly_summary = (
    df_merged.groupby(["cluster", "month"])
    .agg(
        total_transactions=("invoice_no", "count"),
        total_spend=("total_price", "sum"),
        ipsos_mean=("ipsos_confidence", "mean"),
        cons_conf_mean=("Cons_Conf", "mean")
    )
    .reset_index()
)

# Save each cluster's time series into separate CSV files
for cluster_id in monthly_summary["cluster"].unique():
    df_cluster = monthly_summary[monthly_summary["cluster"] == cluster_id]
    df_cluster.to_csv(f"../datasets/cluster_{cluster_id}_series.csv", index=False)
