<a href="https://colab.research.google.com/github/chrisfurlong03/addon_boost/blob/main/Add_on_Bundling_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Add-on Reccomender and Pricing

This notebook imports a small addon_bundling package to:
1) generate synthetic data,
2) train a (price/elasticity) with XGBoost,
3) inspect feature importances and a sample tree,
5) run the optimizer to get top-K offers (probability-ranked, price ≤ per-add-on list price).

In [None]:
import os, shutil, glob, pathlib, xgboost, sklearn, pandas, matplotlib, numpy, graphviz
!gdown --folder "https://drive.google.com/drive/folders/1u0O2rjMQNjKUw-P_wwRylxab13Uglj5G" -O /content/tmp_dl --remaining-ok

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from tmp_dl.config import ADDON_META, PRICE_BUCKETS, Policy
from tmp_dl.data_gen import generate_synthetic_training
from tmp_dl.models import train_propensity_model, train_price_elasticity_model
from tmp_dl.features import TARGET
from tmp_dl.optimizer import optimize_offers
print('Imports OK')


In [None]:
df = generate_synthetic_training(n_bookings=4000)
df.head()


In [None]:
# 1) Days to departure
plt.figure()
df['days_to_departure'].plot(kind='hist', bins=30)
plt.title('Days to departure (histogram)')
plt.xlabel('days_to_departure')
plt.ylabel('count')
plt.show()

addon_rate = df.groupby('addon_id')[TARGET].mean().sort_values(ascending=False)
plt.figure()
addon_rate.plot(kind='bar')
plt.title('Purchase rate by add-on')
plt.xlabel('addon_id')
plt.ylabel('conversion rate')
plt.show()



In [None]:
propensity_model = train_propensity_model(df)
price_model = train_price_elasticity_model(df)
print('Models trained')


In [None]:
# Map feature importances to post-transformer feature names (M2)
preproc = price_model.named_steps['prep']
feature_names = preproc.get_feature_names_out()
clf = price_model.named_steps['clf']
importances = clf.feature_importances_

import pandas as pd
imp = (
    pd.DataFrame({'feature': feature_names, 'importance': importances})
    .sort_values('importance', ascending=False)
    .head(30)
)
imp


In [None]:
top = imp.head(20).sort_values('importance', ascending=True)
plt.figure()
plt.barh(top['feature'], top['importance'])
plt.title('M2 feature importance (top 20)')
plt.xlabel('importance')
plt.ylabel('feature')
plt.tight_layout()
plt.show()


In [None]:
from xgboost import plot_tree
booster = price_model.named_steps['clf'].get_booster()
feature_names = price_model.named_steps["prep"].get_feature_names_out().tolist()
booster.feature_names = feature_names
plt.figure(figsize=(30, 30))
plot_tree(booster, tree_idx=0)
plt.title('M2 — Tree 0')
plt.gcf().set_size_inches(150, 100)
plt.show()

In [None]:
# Optimize offers (probability-ranked; one price per add-on; price ≤ list per add-on)
ctx = pd.DataFrame([{
    'booking_id': 'B_demo',
    'route_od': 'ORD_SFO',
    'flight_duration_min': 270,
    'dep_hour_local': 9,
    'pax_count': 2,
    'days_to_departure': 14,
    'payment_type': 'credit_card',
    'loyalty_tier': 'Gold',
    'season': 'Q4',
    'purchased_any_addon': 0,
    'used_upgrade': 0,
}])
addon_costs = {k: v['cost'] for k, v in ADDON_META.items()}
price_list_map = {k: float(v['base_price']) for k, v in ADDON_META.items()}
offers = optimize_offers(
    context_rows=ctx,
    propensity_model=propensity_model,
    price_model=price_model,
    price_grid=PRICE_BUCKETS,
    policy=Policy(min_margin_pct=0.1, max_discount_pct=0.5),
    addon_costs=addon_costs,
    addon_candidates=list(ADDON_META.keys()),
    top_k=2,
    list_price_map=price_list_map,
)
offers


Notes to self
- payment method should be type of card
- why is booking_id in the optimization step?
- and what if we wanted pricing buckets for each item... or maybe make pricing buckets based on discount rate not price.