In [None]:
import random
import os
import numpy as np
import torch

def seed_everything(seed):
    """
    """
    # 1. Python & Numpy
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
    # 2. PyTorch (CPU & GPU)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    print(f"üîí Locked Random Seed: {seed}")

# --- G·ªåI H√ÄM ---


In [None]:
#import
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
#load data
df_men = pd.read_csv(r"C:\Users\Lenovo\Documents\Neu 2025-2026\Lab\Hillstrom-Men.csv")
df_men = df_men.drop(columns="Unnamed: 0")
print ("---------------------------")
print ("null count:")
print (df_men.isnull().sum())
print ("---------------------------")
print(df_men.dtypes)
print ("---------------------------")
print ("labels:")
print(df_men.columns.tolist())
print ("---------------------------")
print("data shape:")
print(df_men.shape)


---------------------------
null count:
recency            0
history_segment    0
history            0
mens               0
womens             0
zip_code           0
newbie             0
channel            0
visit              0
conversion         0
spend              0
treatment          0
dtype: int64
---------------------------
recency              int64
history_segment     object
history            float64
mens                 int64
womens               int64
zip_code            object
newbie               int64
channel             object
visit                int64
conversion           int64
spend              float64
treatment            int64
dtype: object
---------------------------
labels:
['recency', 'history_segment', 'history', 'mens', 'womens', 'zip_code', 'newbie', 'channel', 'visit', 'conversion', 'spend', 'treatment']
---------------------------
data shape:
(42613, 12)


In [None]:
#Hillstrom-men
#split num and cate

cate_cols = ['zip_code', 'channel']
df_men["history_segment"] =df_men["history_segment"].map({
    "1) $0 - $100": '1', 
    "2) $100 - $200": 2, 
    "3) $200 - $350": "3",
    "4) $350 - $500": "4",
    "5) $500 - $750": "5",
    "6) $750 - $1,000": "6",
    "7) $1,000 +": "7"                         
})
num_cols = ['recency', 'history', 'history_segment']

#split x y t
y_men = df_men["spend"]
t_men = df_men["treatment"]
x_men = df_men.drop(columns=["spend", "treatment", "visit", "conversion"])

x_men_encode = pd.get_dummies(x_men, columns=cate_cols, drop_first=True)
x_men_encode = x_men_encode.astype(int)
#train test split
x_men_train, x_men_test_val,t_men_train, t_men_test_val, y_men_train, y_men_test_val = train_test_split(x_men_encode,t_men.values, y_men.values, test_size=0.4, random_state=42, stratify=t_men)
x_men_val, x_men_test, t_men_val, t_men_test, y_men_val, y_men_test = train_test_split(x_men_test_val, t_men_test_val, y_men_test_val, test_size= 0.75, random_state=42, stratify=t_men_test_val)

#scale
scaler = StandardScaler()
x_men_train[num_cols] = scaler.fit_transform(x_men_train[num_cols])
x_men_val[num_cols] = scaler.transform(x_men_val[num_cols])
x_men_test [num_cols ]= scaler.transform(x_men_test[num_cols])

x_men_train = x_men_train.values.astype(float)
x_men_val = x_men_val.values.astype(float)
x_men_test = x_men_test.values.astype(float)
print (x_men_train)

[[ 6.36764744e-01  9.81582400e-01  9.35637990e-01 ...  0.00000000e+00
   1.00000000e+00  0.00000000e+00]
 [ 6.36764744e-01 -9.60390610e-01 -8.34987151e-01 ...  0.00000000e+00
   1.00000000e+00  0.00000000e+00]
 [ 1.49199505e+00 -3.13066274e-01 -4.45449620e-01 ...  1.00000000e+00
   1.00000000e+00  0.00000000e+00]
 ...
 [-1.07369588e+00  3.34258063e-01 -6.37815337e-02 ...  1.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [-1.35877265e+00  3.34258063e-01 -8.25973163e-04 ...  0.00000000e+00
   1.00000000e+00  0.00000000e+00]
 [-5.03542338e-01 -9.60390610e-01 -7.75966313e-01 ...  1.00000000e+00
   1.00000000e+00  0.00000000e+00]]


In [None]:
#Transform to tensor
def to_tensor(df):
    return torch.tensor(df, dtype=torch.float32)

x_men_train_t = to_tensor(x_men_train)
x_men_val_t = to_tensor(x_men_val)
x_men_test_t = to_tensor(x_men_test)

y_men_train_t = to_tensor(y_men_train).unsqueeze(1)
y_men_val_t = to_tensor(y_men_val).unsqueeze(1)
y_men_test_t = to_tensor(y_men_test).unsqueeze(1)

t_men_train_t = to_tensor(t_men_train.astype(float)).unsqueeze(1)
t_men_val_t = to_tensor(t_men_val.astype(float)).unsqueeze(1)
t_men_test_t = to_tensor(t_men_test.astype(float)).unsqueeze(1)

#Data loader
train_dataset = TensorDataset(x_men_train_t, t_men_train_t, y_men_train_t)
val_dataset = TensorDataset(x_men_val_t, t_men_val_t, y_men_val_t)
test_dataset = TensorDataset(x_men_test_t, t_men_test_t, y_men_test_t)

batch_size = 26000
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers= 4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size,num_workers= 4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers= 4, shuffle=False)

print ("-------------------------------------------------------------")
print ("‚úÖCompleted tranform to tensor‚úÖ")
print (f"Shape of train: x={x_men_train_t.shape}; y ={y_men_train_t.shape}; t={t_men_train_t.shape}")
print (f"Shape of val: x={x_men_val_t.shape}; y={y_men_val_t.shape}; t={t_men_val_t.shape}")
print (f"Shape of test: x={x_men_test_t.shape}; y={y_men_test_t.shape}; t={t_men_test_t.shape}")



-------------------------------------------------------------
‚úÖCompleted tranform to tensor‚úÖ
Shape of train: x=torch.Size([25567, 10]); y =torch.Size([25567, 1]); t=torch.Size([25567, 1])
Shape of val: x=torch.Size([4261, 10]); y=torch.Size([4261, 1]); t=torch.Size([4261, 1])
Shape of test: x=torch.Size([12785, 10]); y=torch.Size([12785, 1]); t=torch.Size([12785, 1])


Evaluation metrics

In [None]:
from metrics import auuc, auqc, lift, krcc

Build Model

In [None]:
from dragonnet import Dragonnet

In [None]:
print("üìä Data Distribution Check:")
print(f"Y train: mean={y_men_train.mean():.4f}, std={y_men_train.std():.4f}")
print(f"Y train zeros: {(y_men_train == 0).sum()} / {len(y_men_train)} ({(y_men_train == 0).sum()/len(y_men_train)*100:.1f}%)")
print(f"\nTreatment balance:")
print(f"  Train: {(t_men_train == 1).sum()} treated, {(t_men_train == 0).sum()} control")
print(f"  Test:  {(t_men_test == 1).sum()} treated, {(t_men_test == 0).sum()} control")

üìä Data Distribution Check:
Y train: mean=1.0188, std=14.8554
Y train zeros: 25342 / 25567 (99.1%)

Treatment balance:
  Train: 12784 treated, 12783 control
  Test:  6392 treated, 6393 control


In [None]:
seed_everything(6)

üîí Locked Random Seed: 5


In [None]:
dragonnet = Dragonnet(input_dim=x_men_train_t.shape[1], epochs=100,ranking_lambda=10.0, alpha = 1.0, beta =1.0, learning_rate=0.001)
dragonnet.fit(train_loader, val_loader)

print ("Complete training")

üîÉüîÉüîÉBegin training DragonnetüîÉüîÉüîÉ
Epoch 1 | Train Loss: 146312544256.0000 | VAL LOSS: 7253051392.0000
Epoch 2 | Train Loss: 146231721984.0000 | VAL LOSS: 7250505216.0000
Epoch 3 | Train Loss: 146192678912.0000 | VAL LOSS: 7248637440.0000
Epoch 4 | Train Loss: 146167906304.0000 | VAL LOSS: 7247253504.0000
Epoch 5 | Train Loss: 146143444992.0000 | VAL LOSS: 7246117888.0000
Epoch 6 | Train Loss: 146112151552.0000 | VAL LOSS: 7245166080.0000
Epoch 7 | Train Loss: 146077777920.0000 | VAL LOSS: 7244262912.0000
Epoch 8 | Train Loss: 146043289600.0000 | VAL LOSS: 7243251712.0000
Epoch 9 | Train Loss: 146007965696.0000 | VAL LOSS: 7241920000.0000
Epoch 10 | Train Loss: 145968349184.0000 | VAL LOSS: 7240128000.0000
Epoch 11 | Train Loss: 145920999424.0000 | VAL LOSS: 7237747200.0000
Epoch 12 | Train Loss: 145863114752.0000 | VAL LOSS: 7234675712.0000
Epoch 13 | Train Loss: 145792565248.0000 | VAL LOSS: 7230633984.0000
Epoch 14 | Train Loss: 145702649856.0000 | VAL LOSS: 7224537088

In [None]:
# Sau khi train xong
y0_pred, y1_pred, t_pred, _ = dragonnet.predict(x_men_test_t)  # Test 100 samples

print("\nüìä Model Output Check:")
print(f"y0_pred: min={y0_pred.min():.4f}, max={y0_pred.max():.4f}, mean={y0_pred.mean():.4f}")
print(f"y1_pred: min={y1_pred.min():.4f}, max={y1_pred.max():.4f}, mean={y1_pred.mean():.4f}")
print(f"propensity_score:  min={t_pred.min():.4f}, max={t_pred.max():.4f}, mean={t_pred.mean():.4f}")

uplift = (y1_pred - y0_pred).numpy().flatten()
print(f"Uplift:  min={uplift.min():.4f}, max={uplift.max():.4f}, std={uplift.std():.4f}")

if uplift.std() < 0.01:
    print("‚ö†Ô∏è WARNING: Model is predicting almost constant uplift!")


üìä Model Output Check:
y0_pred: min=0.6358, max=0.6358, mean=0.6358
y1_pred: min=3.5270, max=7.8734, mean=5.2306
propensity_score:  min=0.0434, max=0.9951, mean=0.1809
Uplift:  min=2.8912, max=7.2377, std=0.4598


In [None]:
print ("Evaluating baselineüîÉüîÉüîÉ")
y0_pred, y1_pred, _,_ = dragonnet.predict(x_men_test_t)

uplift_pred = (y1_pred - y0_pred).numpy().flatten()

y_true = y_men_test_t.numpy().flatten()
t_true = t_men_test_t.numpy().flatten()

auuc = auuc(y_true, t_true, uplift_pred, bins=100, plot=True)
auqc = auqc(y_true, t_true, uplift_pred, bins=100, plot=True)
lift = lift(y_true, t_true, uplift_pred, h=0.3, bins=100)
krcc = krcc(y_true, t_true, uplift_pred, bins= 100)

print ("-"*40)
print ("AUUC: ", auuc)
print ("AUQC: ", auqc)
print ("Lift: ", lift)
print ("KRCC: ", krcc)

Evaluating baselineüîÉüîÉüîÉ


TypeError: 'numpy.float64' object is not callable