# 📈 CTR/CVR Modeling for Meal Ads

This notebook trains models to predict Click-Through Rate (CTR) and Conversion Rate (CVR) using simulated food ad interaction data.

In [2]:
import pandas as pd

# Load data
meals = pd.read_csv("../data/meals.csv")
interactions = pd.read_csv("../data/interactions.csv")

print("Meals shape:", meals.shape)
print("Interactions shape:", interactions.shape)

interactions.head()

Meals shape: (20, 6)
Interactions shape: (300, 6)


Unnamed: 0,user_id,meal_id,category,price,clicked,purchased
0,user_37,20,Dessert,56.21,1,0
1,user_36,5,Pizza,44.04,1,0
2,user_48,13,Cajun,104.92,1,0
3,user_36,2,Cajun,115.56,1,0
4,user_31,5,Pizza,44.04,0,0


In [3]:
# Merge meal features into interactions
# TODO: Clean this up to not duplicate categories, prices, etc.
data = interactions.merge(meals, on="meal_id", how="left")
data.head()

Unnamed: 0,user_id,meal_id,category_x,price_x,clicked,purchased,title,category_y,price_y,rating,restaurant
0,user_37,20,Dessert,56.21,1,0,Falafel Platter,Dessert,56.21,4.4,Restaurant T
1,user_36,5,Pizza,44.04,1,0,Chicago Deep Dish Pizza,Pizza,44.04,4.5,Restaurant E
2,user_48,13,Cajun,104.92,1,0,Sourdough Bread,Cajun,104.92,4.1,Restaurant M
3,user_36,2,Cajun,115.56,1,0,Vegan Lasagna,Cajun,115.56,4.1,Restaurant B
4,user_31,5,Pizza,44.04,0,0,Chicago Deep Dish Pizza,Pizza,44.04,4.5,Restaurant E


In [8]:
# Simple feature engineering
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
data['category_encoded'] = le.fit_transform(data['category_x'])

# Features for both CTR and CVR
features = ["price_x", "rating", "category_encoded"]


In [9]:
from sklearn.model_selection import train_test_split

# CTR: all rows
X_ctr = data[features]
y_ctr = data["clicked"]
X_train_ctr, X_test_ctr, y_train_ctr, y_test_ctr = train_test_split(X_ctr, y_ctr, test_size=0.2, random_state=42)


In [10]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score

ctr_model = GradientBoostingClassifier()
ctr_model.fit(X_train_ctr, y_train_ctr)

ctr_preds = ctr_model.predict_proba(X_test_ctr)[:, 1]
ctr_auc = roc_auc_score(y_test_ctr, ctr_preds)
print(f"CTR AUC: {ctr_auc:.4f}")


CTR AUC: 0.4188


In [11]:
# CVR: only rows where clicked == 1
data_cvr = data[data["clicked"] == 1]
X_cvr = data_cvr[features]
y_cvr = data_cvr["purchased"]
X_train_cvr, X_test_cvr, y_train_cvr, y_test_cvr = train_test_split(X_cvr, y_cvr, test_size=0.2, random_state=42)


In [12]:
cvr_model = GradientBoostingClassifier()
cvr_model.fit(X_train_cvr, y_train_cvr)

cvr_preds = cvr_model.predict_proba(X_test_cvr)[:, 1]
cvr_auc = roc_auc_score(y_test_cvr, cvr_preds)
print(f"CVR AUC: {cvr_auc:.4f}")


CVR AUC: 0.6641


In [13]:
import joblib

# Save models for inference on the streamlit application
joblib.dump(ctr_model, "../models/ctr_model.pkl")
joblib.dump(cvr_model, "../models/cvr_model.pkl")
print("Models saved.")


Models saved.
