In [2]:
import pandas as pd
import numpy as np

# Files
RAW_FILE = r"E:\c drive\amazon\data\online_retail\online_retail_II.xlsx"
CLTV_FILE = r"E:\c drive\amazon\notebooks\cltv_with_predictions.csv"
OUT_FILE = "promotion_hourly_triggers.csv"

# ---- Load Data ----
raw = pd.read_excel(RAW_FILE, parse_dates=['InvoiceDate'])
raw = raw[raw['Quantity'] > 0]
raw = raw.dropna(subset=['Customer ID'])
raw.rename(columns={'Customer ID': 'Customer_ID'}, inplace=True)

cltv = pd.read_csv(CLTV_FILE)
cltv.columns = cltv.columns.str.replace(' ', '_')

# Merge CLTV segment into raw data
df = raw.merge(cltv[['Customer_ID', 'CLTV_Segment']], on='Customer_ID', how='left')
df.rename(columns={'CLTV_Segment': 'CLTV_Segment_Label'}, inplace=True)


df['Hour'] = df['InvoiceDate'].dt.hour

# ---- Aggregate ----
hourly = (
    df.groupby(['CLTV_Segment_Label', 'Hour'])
      .agg(Total_Orders=('Invoice', 'nunique'),
           Total_Revenue=('Price', 'sum'))
      .reset_index()
)

# ---- Trigger Class ----
def classify_triggers(segment_df):
    # Determine 80th percentile for HOT
    threshold_hot = np.percentile(segment_df['Total_Orders'], 80)
    threshold_warm = np.percentile(segment_df['Total_Orders'], 50)
    def trigger(x):
        if x >= threshold_hot:
            return "HOT_HOUR"
        elif x >= threshold_warm:
            return "WARM_HOUR"
        return "COLD_HOUR"
    segment_df['Trigger_Class'] = segment_df['Total_Orders'].apply(trigger)
    return segment_df

hourly = hourly.groupby('CLTV_Segment_Label').apply(classify_triggers).reset_index(drop=True)

hourly.to_csv(OUT_FILE, index=False)
print("Saved:", OUT_FILE)
print(hourly.head())


Saved: promotion_hourly_triggers.csv
  CLTV_Segment_Label  Hour  Total_Orders  Total_Revenue Trigger_Class
0                  A     7            31       1344.840     COLD_HOUR
1                  A     8           182      10952.250     COLD_HOUR
2                  A     9           505      33605.220     WARM_HOUR
3                  A    10           900      54116.012     WARM_HOUR
4                  A    11           966      74449.311      HOT_HOUR


  hourly = hourly.groupby('CLTV_Segment_Label').apply(classify_triggers).reset_index(drop=True)
