In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
import tensorflow.keras.backend as K
import pandas as pd
import numpy as np
import pickle as pkl

In [22]:
data_path = 'C:/Users/40107904/OneDrive - Anheuser-Busch InBev/ABI/WORK/hackathon_power/hackathon_lt_equity/dummy_data/processed_data/preprocessed_data_2.pkl'

with open(data_path, 'rb') as f:
    data_dict = pkl.load(f)

print(data_dict.keys())

dict_keys(['input_data', 'output_data'])


# Data processing

In [23]:
print(data_dict["input_data"]["vehicle_id"].shape)

(6, 1)


In [32]:
# train ds
num_price_control_features = data_dict["input_data"]["price_controls"].shape[1]
num_vehicle_control_features = data_dict["input_data"]["vehicle_controls"].shape[1]
num_macro_control_features = data_dict["input_data"]["macro_controls"].shape[1]

inputs_dict = {
    "time_idx": data_dict["input_data"]["time_idx"],
    "month_sin": data_dict["input_data"]["month_sin"],
    "month_cos": data_dict["input_data"]["month_cos"],
    "country_id": data_dict["input_data"]["country_id"],
    "brand_id": data_dict["input_data"]["brand_id"],
    "price_controls": data_dict["input_data"]["price_controls"],
    "vehicle_mix": data_dict["input_data"]["vehicle_controls"],
    "total_spend": data_dict["input_data"]["total_spend"],   
    "macro_controls": data_dict["input_data"]["macro_controls"],
}

targets = (data_dict["input_data"]["y_true"]/100, data_dict["input_data"]["group_id"])

BATCH_SIZE = 15
train_ds = tf.data.Dataset.from_tensor_slices((inputs_dict, targets))
train_ds = train_ds.batch(BATCH_SIZE) 

In [33]:
# predict_x
predict_x = {
    "time_idx": data_dict["output_data"]["time_idx"],
    "month_sin": data_dict["output_data"]["month_sin"],
    "month_cos": data_dict["output_data"]["month_cos"],
    "country_id": data_dict["output_data"]["country_id"],
    "brand_id": data_dict["output_data"]["brand_id"],
    "price_controls": data_dict["output_data"]["price_controls"],
    "vehicle_mix": data_dict["output_data"]["vehicle_controls"],
    "total_spend": data_dict["output_data"]["total_spend"],
    "macro_controls": data_dict["output_data"]["macro_controls"],
}

test_targets = (data_dict["output_data"]["y_true"]/100, data_dict["output_data"]["group_id"])

# Model Arch

In [64]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# -----------------------
# Hyperparams
# -----------------------
COUNTRY_VOCAB = 2
BRAND_VOCAB = 10
COUNTRY_EMB = 4
BRAND_EMB = 10
TIME_HIDDEN = 128
CAT_HIDDEN = 32
CONTROL_HIDDEN = 256
DROPOUT = 0.1
VEHICLE_COUNT = 6
VEHICLE_EMB = 8


# -----------------------
# Inputs
# -----------------------
time_idx_in = layers.Input(shape=(1,), dtype="int32", name="time_idx")
month_sin_in = layers.Input(shape=(1,), dtype="float32", name="month_sin")
month_cos_in = layers.Input(shape=(1,), dtype="float32", name="month_cos")

country_in = layers.Input(shape=(1,), dtype="int32", name="country_id")
brand_in = layers.Input(shape=(1,), dtype="int32", name="brand_id")

# vehicle id not required if we pass mix; keep only if you have per-row vehicle id
vehicle_id_in = layers.Input(shape=(1,), dtype="int32", name="vehicle_id")  # optional

# controls split by logical groups
price_controls_in = layers.Input(shape=(num_price_control_features,), dtype="float32", name="price_controls")
macro_controls_in = layers.Input(shape=(num_macro_control_features,), dtype="float32", name="macro_controls")

# total spend and vehicle mix separate
total_spend_in = layers.Input(shape=(1,), dtype="float32", name="total_spend")
vehicle_mix_in = layers.Input(shape=(VEHICLE_COUNT,), dtype="float32", name="vehicle_mix")  # fractions sum to 1

# -----------------------
# Time subnet
# -----------------------
time_float = layers.Lambda(lambda x: tf.cast(x, tf.float32))(time_idx_in)
t = layers.Concatenate()([time_float, month_sin_in, month_cos_in])
t = layers.Dense(TIME_HIDDEN, activation="relu")(t)
t = layers.Dropout(DROPOUT)(t)
t = layers.Dense(TIME_HIDDEN // 2, activation="relu")(t)
time_out = layers.Dense(1, activation=None, name="time_out")(t)  # (batch,1)

# -----------------------
# Country subnet
# -----------------------
c_emb = layers.Embedding(input_dim=COUNTRY_VOCAB, output_dim=COUNTRY_EMB, name="country_emb")(country_in)
c = layers.Flatten()(c_emb)
c = layers.Dense(CAT_HIDDEN, activation="relu")(c)
country_out = layers.Dense(1, activation=None, name="country_out")(c)

# -----------------------
# Brand subnet
# -----------------------
b_emb = layers.Embedding(input_dim=BRAND_VOCAB, output_dim=BRAND_EMB, name="brand_emb")(brand_in)
b = layers.Flatten()(b_emb)
b = layers.Dense(CAT_HIDDEN, activation="relu")(b)
brand_out = layers.Dense(1, activation=None, name="brand_out")(b)

# -----------------------
# Controls subnets (price / macro controls) -> scalar each
# -----------------------
# Price controls
p = layers.Dense(CONTROL_HIDDEN, activation="relu")(price_controls_in)
p = layers.Dropout(DROPOUT)(p)
p = layers.Dense(CONTROL_HIDDEN // 2, activation="relu")(p)
p_out = layers.Dense(1, activation=None, name="price_ctrl_out")(p)

# Macro controls
m = layers.Dense(CONTROL_HIDDEN, activation="relu")(macro_controls_in)
m = layers.Dropout(DROPOUT)(m)
m = layers.Dense(CONTROL_HIDDEN // 2, activation="relu")(m)
m_out = layers.Dense(1, activation=None, name="macro_ctrl_out")(m)

# -----------------------
# Vehicle mix -> learned effect
# (simple approach: learn mapping from mix vector to an effect scalar)
# -----------------------
vm = layers.Dense(VEHICLE_EMB, activation="relu", name="vehicle_mix_emb")(vehicle_mix_in)  # (batch, emb)
vehicle_effect = layers.Dense(1, activation=None, name="vehicle_effect")(vm)              # (batch,1)

# combine total_spend and vehicle_effect -> per-row vehicle contribution
vehicle_spend_out = layers.Multiply(name="vehicle_spend_out")([total_spend_in, vehicle_effect])  # (batch,1)

# combine p and vehicle controls
business_ctrl_concat = layers.Concatenate(name="business_ctrl_concat")([p_out, vehicle_spend_out])  # (batch,2)
business_ctrl_hidden = layers.Dense(8, activation=None, name="business_ctrl_hidden")(business_ctrl_concat)  # (batch,1)
business_ctrl_out = layers.Dense(1, activation=None, name="business_ctrl_out")(business_ctrl_hidden)  # (batch,1)


# -----------------------
# Baseline and variability split (explicit 90/10)
# -----------------------
baseline = layers.Add(name="baseline")([time_out, country_out, brand_out, m_out])  # (batch,1)
variability = layers.Lambda(lambda x: 0.1 * x, name="variability")(business_ctrl_out)  # enforce ~10% effect scale

# final additive logit
add_logit = layers.Add(name="add_logit")([baseline, variability])  # (batch,1)

# -----------------------
# Build model
# -----------------------
model = keras.Model(
    inputs=[
        time_idx_in, month_sin_in, month_cos_in,
        country_in, brand_in,
        price_controls_in, vehicle_mix_in, macro_controls_in,
        total_spend_in
    ],
    outputs=[add_logit, business_ctrl_out],
    name="gam_vehicle_mix_model"
)

model.summary()

In [65]:
def fractions_to_logits_tf(y_true, group_ids, eps=1e-8):
    """
    Convert true fractions (per group) to centered logits.
    Args:
        y_true:    (N,) float32 tensor, fractions per row (sum=1 per group)
        group_ids: (N,) int32 tensor, group index per row
        eps:       small constant to avoid log(0)
    Returns:
        logits: (N,) float32 tensor
    """
    y_true = tf.reshape(y_true, [-1])
    group_ids = tf.reshape(group_ids, [-1])

    # clip to avoid log(0)
    y_true = tf.clip_by_value(y_true, eps, 1.0)

    # raw logits = log(p)
    raw_logits = tf.math.log(y_true)

    # subtract group mean so logits are centered (softmax is shift-invariant)
    num_groups = tf.reduce_max(group_ids) + 1
    group_means = tf.math.unsorted_segment_mean(raw_logits, group_ids, num_groups)
    centered_logits = raw_logits - tf.gather(group_means, group_ids)

    return centered_logits


def grouped_loss(y_true, logits, group_ids, ctrl_out, num_brands, ctrl_reg=1e-3, smooth=0.05, logit_reg=1e-4):
    """
    Grouped softmax loss with:
      - label smoothing to avoid 0/100 collapse
      - logit penalty to prevent extreme values
      - control penalty to keep controls small

    y_true:   (N,) float32, original KPI values (e.g. 0–100 scale)
    logits:   (N,) float32, model outputs before softmax
    group_ids:(N,) int32, group index for each row
    ctrl_out: (N,1) float32, control contribution
    num_brands: int, number of brands per group
    """

    y_true = tf.reshape(y_true, [-1])
    logits = tf.reshape(logits, [-1])
    group_ids = tf.reshape(group_ids, [-1])

    num_groups = tf.reduce_max(group_ids) + 1
    eps = 0

    # normalize y_true within each group → fractions
    group_sum_true = tf.math.unsorted_segment_sum(y_true, group_ids, num_groups)
    true_frac = y_true / (tf.gather(group_sum_true, group_ids) + eps)

    # label smoothing
    true_frac = true_frac * (1.0 - smooth) + smooth / float(num_brands)

    # predicted fractions (per-group softmax)
    logits = tf.clip_by_value(logits, -20.0, 20.0)  # stability
    exp_logits = tf.exp(logits - tf.reduce_max(logits))
    seg_sum = tf.math.unsorted_segment_sum(exp_logits, group_ids, num_groups)
    pred_frac = exp_logits / (tf.gather(seg_sum, group_ids) + eps)

    # mean squared error on fractions
    # mse = tf.reduce_mean(tf.square(pred_frac - true_frac))
    mse = tf.reduce_mean(tf.square(logits - fractions_to_logits_tf(y_true, group_ids)))

    # penalties
    ctrl_pen = tf.reduce_mean(tf.square(ctrl_out)) * ctrl_reg
    logit_pen = tf.reduce_mean(tf.square(logits)) * logit_reg

    return 100 * mse + ctrl_pen + logit_pen

In [66]:
# train
optimizer = keras.optimizers.Adam(learning_rate=0.005)

@tf.function
def train_step(batch_x, y_true, group_ids):
    with tf.GradientTape() as tape:
        logits, business_ctrl_out = model(batch_x, training=True)
        # loss_value = grouped_loss_old(y_true, logits, group_ids, ctrl_out, ctrl_reg=1e-3)
        # loss_value = simple_loss(y_true, logits, ctrl_out, ctrl_reg=1e-1)
        # loss_value = mse_loss(y_true, logits)
        loss_value = grouped_loss(
            y_true=y_true,
            logits=logits,
            group_ids=group_ids,
            ctrl_out=business_ctrl_out,
            num_brands=5,
            ctrl_reg=1e-3,
            smooth=0.005,
            logit_reg=1e-4
        )
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    return loss_value


for epoch in range(100):
    epoch_losses = []
    for (batch_x, (y_true, group_ids)) in train_ds:  # your tf.data must yield this structure
        loss_value = train_step(batch_x, y_true, group_ids)
        epoch_losses.append(loss_value.numpy())
    print(f"Epoch {epoch+1}: loss = {np.mean(epoch_losses):.4f}", flush=True)

Epoch 1: loss = 118.4314
Epoch 2: loss = 46.0285
Epoch 3: loss = 48.0769
Epoch 4: loss = 79.5705
Epoch 5: loss = 75.3216
Epoch 6: loss = 56.9324
Epoch 7: loss = 7.6695
Epoch 8: loss = 7.4509
Epoch 9: loss = 4.4700
Epoch 10: loss = 6.6304
Epoch 11: loss = 3.8364
Epoch 12: loss = 3.8822
Epoch 13: loss = 3.3279
Epoch 14: loss = 3.7241
Epoch 15: loss = 3.2489
Epoch 16: loss = 2.5452
Epoch 17: loss = 2.3982
Epoch 18: loss = 2.7982
Epoch 19: loss = 2.2117
Epoch 20: loss = 2.6856
Epoch 21: loss = 2.4501
Epoch 22: loss = 3.4054
Epoch 23: loss = 2.8695
Epoch 24: loss = 2.6455
Epoch 25: loss = 2.3469
Epoch 26: loss = 2.5170
Epoch 27: loss = 2.3339
Epoch 28: loss = 1.9923
Epoch 29: loss = 2.2700
Epoch 30: loss = 1.9231
Epoch 31: loss = 1.7204
Epoch 32: loss = 1.8190
Epoch 33: loss = 1.7038
Epoch 34: loss = 2.2344
Epoch 35: loss = 1.8107
Epoch 36: loss = 1.3918
Epoch 37: loss = 1.8315
Epoch 38: loss = 1.8320
Epoch 39: loss = 1.5131
Epoch 40: loss = 1.6677
Epoch 41: loss = 1.5614
Epoch 42: loss = 1

In [67]:
# predict
logits, ctrl_out = model(predict_x, training=False)
logits = logits.numpy().reshape(-1)
print(logits)

[ 1.1822087   0.24918742 -0.26936042 -0.7267201  -0.23957261  1.2047836
  0.2522481  -0.26516527 -0.72145385 -0.25170484  1.181187    0.26207957
 -0.27584302 -0.7078216  -0.23885056]


In [68]:
def logits_to_percentages(logits, group_ids):
    """
    Convert logits -> per-group percentages (summing to 100).
    
    logits:    (N,) array of raw model outputs
    group_ids: (N,) array of group IDs (int)
    """
    logits = np.asarray(logits).reshape(-1)
    group_ids = np.asarray(group_ids).reshape(-1)

    softmax = np.zeros_like(logits, dtype=float)

    for gid in np.unique(group_ids):
        mask = (group_ids == gid)
        group_logits = logits[mask]

        # stable softmax within group
        exp_logits = np.exp(group_logits - np.max(group_logits))
        softmax[mask] = exp_logits / exp_logits.sum()



    return (softmax * 100)  # percentages, rounded to 1 decimal


pred_pct = logits_to_percentages(logits, data_dict["output_data"]["group_id"])

In [69]:
num_brands = data_dict["output_data"]["brand_id"].max() + 1

In [70]:
pred_pct = pred_pct.reshape(-1,num_brands)
for i in range(len(pred_pct)):
    pred_pct[i] = pred_pct[i] / pred_pct[i].sum() * 100

pred_pct = pred_pct.reshape(-1)

In [71]:
# model predicted powers
print(pred_pct.reshape(-1,num_brands))  

[[49.57639575 19.50154305 11.61091775  7.34916106 11.9619824 ]
 [50.13919659 19.34178246 11.52887015  7.30504942 11.68510138]
 [49.38933735 19.70016732 11.50411261  7.4687372  11.93764552]]


In [72]:
print(pred_pct.reshape(-1,num_brands).sum(axis=1))  # should be all 100s

[100. 100. 100.]


In [73]:
# true powers
y_pred_true = data_dict["output_data"]["y_true"]
print(y_pred_true.reshape(-1,num_brands))

[[46.9 19.2 13.3  9.1 11.5]
 [46.9 19.2 13.3  9.1 11.5]
 [46.4 19.  13.5  9.3 11.8]]


In [74]:
# rmse skill
rmse = np.sqrt(np.mean((pred_pct - y_pred_true)**2))
print(f"Test RMSE: {rmse:.4f}")
standard_deviation = np.std(y_pred_true)
print(f"Standard Deviation of true values: {standard_deviation:.4f}")
rmse_skill = max(0, min(1, 1-(rmse/standard_deviation)))
print(rmse_skill)

Test RMSE: 1.7715
Standard Deviation of true values: 13.7655
0.8713080094110072


In [75]:
# trend hit by brand
num_brands = data_dict["output_data"]["brand_id"].max() + 1

trend_hits = 0
total_trends = (len(y_pred_true) / num_brands - 1 ) * num_brands

reshaped_pred = pred_pct.reshape(-1, num_brands)
reshaped_true = y_pred_true.reshape(-1, num_brands)

reshaped_pred_diff = np.diff(reshaped_pred, axis=0)
reshaped_true_diff = np.diff(reshaped_true, axis=0)

reshaped_pred_sign = np.sign(reshaped_pred_diff)
reshaped_true_sign = np.sign(reshaped_true_diff)

trend_hits = np.sum(reshaped_pred_sign == reshaped_true_sign)

trend_hit_rate = trend_hits / total_trends
print(f"Total Trends: {total_trends}")
print(f"Trend Hits: {trend_hits}")
print(f"Trend Hit Rate: {trend_hit_rate:.4f}")

Total Trends: 10.0
Trend Hits: 3
Trend Hit Rate: 0.3000


In [76]:
# final score
final_score = 0.5 * (rmse_skill + trend_hit_rate)
print(f"Final Score: {final_score:.4f}")

Final Score: 0.5857
