# 🎯 强推推荐排序实验说明
本实验模拟一个“强推场景”（平台有较强干预推荐曝光），构建真实兴趣与从众点击的标签，结合无偏排序方法进行训练。

### 模型流程概述
1. **构造模拟数据**：包含点击、停留时长、是否加入购物车、曝光概率等。
2. **生成去噪标签（denoised_label）**：仅保留真实兴趣点击为正例。
3. **构造无偏点击对（Debiased Pairwise Training）**：保留 interest > non-interest 的排序对。
4. **加入 IPW（曝光反比权重）样本权重**：用于降低曝光偏差。
5. **使用 LightGBM + LambdaMART 训练模型**：排序目标函数为 NDCG，评估 AUC/NDCG/MAP。

⚠️ 若直接训练排序模型而不去偏，会学习到“从众”信号导致排序偏差。
→ 所以我们先清洗出更准确的训练对，再做 LambdaMART 排序建模。

In [1]:

import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, average_precision_score

np.random.seed(42)
n_users, n_items_per_user = 100, 8
rows = []
for user_id in range(n_users):
    for item_id in range(n_items_per_user):
        is_interest = np.random.rand() < 0.3
        is_conformity = not is_interest and np.random.rand() < 0.4
        click = int(is_interest or is_conformity)
        stay_time = np.random.randint(5, 300) if click else np.random.randint(0, 20)
        add_to_cart = int(is_interest and np.random.rand() < 0.5)
        exposure_prob = np.random.uniform(0.2, 1.0)
        rows.append({
            "user_id": user_id,
            "item_id": item_id,
            "click": click,
            "stay_time": stay_time,
            "add_to_cart": add_to_cart,
            "exposure_prob": exposure_prob,
            "is_interest": int(is_interest),
            "is_conformity": int(is_conformity)
        })
df = pd.DataFrame(rows)
df["denoised_label"] = df["is_interest"]
df["ipw_weight"] = 1.0 / df["exposure_prob"]
df.head()


Unnamed: 0,user_id,item_id,click,stay_time,add_to_cart,exposure_prob,is_interest,is_conformity,denoised_label,ipw_weight
0,0,0,0,10,0,0.823753,0,0,0,1.213956
1,0,1,0,10,0,0.567399,0,0,0,1.762428
2,0,2,1,135,0,0.216468,0,1,0,4.619629
3,0,3,0,5,0,0.200623,0,0,0,4.984473
4,0,4,0,11,0,0.21845,0,0,0,4.577708


In [2]:

def get_group(df):
    return df.groupby("user_id").size().to_list()

def ndcg_at_k(r, k):
    r = np.asarray(r, dtype=np.float32)[:k]
    if r.size:
        dcg = np.sum(r / np.log2(np.arange(2, r.size + 2)))
        idcg = np.sum(sorted(r, reverse=True) / np.log2(np.arange(2, r.size + 2)))
        return dcg / idcg if idcg > 0 else 0.
    return 0.

def evaluate(model, df, label_col):
    df["score"] = model.predict(df[["user_id", "item_id"]])
    auc = roc_auc_score(df[label_col], df["score"])
    ap = average_precision_score(df[label_col], df["score"])
    ndcgs = []
    for _, group in df.groupby("user_id"):
        ranked = group.sort_values("score", ascending=False)
        ndcgs.append(ndcg_at_k(ranked[label_col].values, 5))
    return auc, np.mean(ndcgs), ap

train_df = df.sample(frac=0.8, random_state=42)
test_df = df.drop(train_df.index)
group_train = get_group(train_df)
group_test = get_group(test_df)

params = {
    "objective": "lambdarank",
    "metric": "ndcg",
    "learning_rate": 0.1,
    "boosting_type": "gbdt",
    "verbosity": -1
}

# Baseline
train_set = lgb.Dataset(train_df[["user_id", "item_id"]], label=train_df["click"], group=group_train, free_raw_data=False)
test_set = lgb.Dataset(test_df[["user_id", "item_id"]], label=test_df["click"], group=group_test, reference=train_set)
model_baseline = lgb.train(params, train_set, num_boost_round=100, valid_sets=[test_set],
                           callbacks=[lgb.early_stopping(stopping_rounds=10)])
auc1, ndcg1, map1 = evaluate(model_baseline, test_df, "click")

# Debiased
train_set_denoised = lgb.Dataset(train_df[["user_id", "item_id"]], label=train_df["denoised_label"], group=group_train, free_raw_data=False)
test_set_denoised = lgb.Dataset(test_df[["user_id", "item_id"]], label=test_df["denoised_label"], group=group_test, free_raw_data=False)
model_denoised = lgb.train(params, train_set_denoised, num_boost_round=100, valid_sets=[test_set_denoised],
                           callbacks=[lgb.early_stopping(stopping_rounds=10)])
auc2, ndcg2, map2 = evaluate(model_denoised, test_df, "denoised_label")

# Debiased + IPW
train_set_ipw = lgb.Dataset(train_df[["user_id", "item_id"]], label=train_df["denoised_label"],
                            group=group_train, weight=train_df["ipw_weight"], free_raw_data=False)
model_ipw = lgb.train(params, train_set_ipw, num_boost_round=100, valid_sets=[test_set_denoised],
                      callbacks=[lgb.early_stopping(stopping_rounds=10)])
auc3, ndcg3, map3 = evaluate(model_ipw, test_df, "denoised_label")

print(f"Baseline (Click Label):       AUC = {auc1:.4f}, NDCG@5 = {ndcg1:.4f}, MAP = {map1:.4f}")
print(f"Debiased (Denoised Label):    AUC = {auc2:.4f}, NDCG@5 = {ndcg2:.4f}, MAP = {map2:.4f}")
print(f"Debiased + IPW (Our Method):  AUC = {auc3:.4f}, NDCG@5 = {ndcg3:.4f}, MAP = {map3:.4f}")


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[3]	valid_0's ndcg@1: 0.8625	valid_0's ndcg@2: 0.911086	valid_0's ndcg@3: 0.921683	valid_0's ndcg@4: 0.938016	valid_0's ndcg@5: 0.939904
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[3]	valid_0's ndcg@1: 0.8125	valid_0's ndcg@2: 0.869491	valid_0's ndcg@3: 0.91607	valid_0's ndcg@4: 0.921453	valid_0's ndcg@5: 0.921453
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's ndcg@1: 0.85	valid_0's ndcg@2: 0.892484	valid_0's ndcg@3: 0.918898	valid_0's ndcg@4: 0.932966	valid_0's ndcg@5: 0.932966
Baseline (Click Label):       AUC = 0.4992, NDCG@5 = 0.7418, MAP = 0.5910
Debiased (Denoised Label):    AUC = 0.5144, NDCG@5 = 0.4098, MAP = 0.3195
Debiased + IPW (Our Method):  AUC = 0.4759, NDCG@5 = 0.4216, MAP = 0.2883
