In [None]:
# Auto Reload Modules
%load_ext autoreload

In [5]:
import numpy as np
import torch
from torch.utils.data import DataLoader

from utils.data_loader import LogConfig, ItemConfig, load_log_table, load_item_table
from dataset.rl4rs_dataset import RL4RSDataset9Step
from train.train_dqn_basic import train_dqn_basic, TrainConfig

from eval.evaluate import evaluate


def split_df_log_train_forget_test(df_log, train_ratio=0.6, forget_ratio=0.2, test_ratio=0.2, seed=42):
    assert abs(train_ratio + forget_ratio + test_ratio - 1.0) < 1e-9
    rng = np.random.default_rng(seed)
    idx = np.arange(len(df_log))
    rng.shuffle(idx)

    n_train = int(len(df_log) * train_ratio)
    n_forget = int(len(df_log) * forget_ratio)

    train_idx = idx[:n_train]
    forget_idx = idx[n_train:n_train + n_forget]
    test_idx = idx[n_train + n_forget:]

    return (
        df_log.iloc[train_idx].reset_index(drop=True),
        df_log.iloc[forget_idx].reset_index(drop=True),
        df_log.iloc[test_idx].reset_index(drop=True),
    )


def make_dataset(df_log_part, df_item):
    return RL4RSDataset9Step(
        df_log=df_log_part,
        df_item=df_item,
        slate_size=9,
        use_tier_flags=True,
        tier_weights=(1.0, 2.0, 4.0),
        exclude_history_candidates=True
    )


def train_on(df_log_train, df_item, cfg):
    ds = make_dataset(df_log_train, df_item)
    loader = DataLoader(ds, batch_size=cfg.batch_size, shuffle=True, num_workers=4, pin_memory=True)

    sample0 = ds[0]
    state_dim = sample0["state"].numel()
    item_dim = sample0["item_vec"].numel()

    q = train_dqn_basic(
        train_loader=loader,
        state_dim=state_dim,
        item_dim=item_dim,
        hidden_dim=256,
        cfg=cfg
    )
    return q


def eval_report(tag, model, test_ds, forget_ds, device, ks=(1, 3, 5, 9)):
    print(f"\n=== {tag} ===")
    print("Test:", evaluate(model, test_ds, device=device, ks=ks))
    print("Forget:", evaluate(model, forget_ds, device=device, ks=ks))


def main():
    log_path = r"E:\Kuliah\Kuliah\Kuliah\PRODI\Semester 7\ProSkripCode\data\raw\trainset.csv"
    item_path = r"E:\Kuliah\Kuliah\Kuliah\PRODI\Semester 7\ProSkripCode\data\raw\item_info.csv"

    log_cfg = LogConfig(path=log_path, slate_size=9, max_click_history=50)
    item_cfg = ItemConfig(path=item_path, item_vec_dim=None)

    df_log = load_log_table(log_cfg).reset_index(drop=True)
    df_item = load_item_table(item_cfg)

    df_log = df_log.iloc[:200].reset_index(drop=True)

    df_train, df_forget, df_test = split_df_log_train_forget_test(df_log, 0.6, 0.2, 0.2, seed=42)

    test_ds = make_dataset(df_test, df_item)
    forget_ds = make_dataset(df_forget, df_item)

    cfg = TrainConfig(
        device="cuda" if torch.cuda.is_available() else "cpu",
        lr=1e-3,
        gamma=0.99,
        batch_size=256,
        num_epochs=15,
        target_update="hard",
        hard_update_interval=500,
        save_dir="weights",
        save_name="dqn_basic.pt"
    )

    # A: untrained baseline (kalau train_dqn_basic tidak support 0 epoch, skip A)
    try:
        cfg_A = TrainConfig(
            device=cfg.device, lr=cfg.lr, gamma=cfg.gamma, batch_size=cfg.batch_size,
            num_epochs=0,
            target_update=cfg.target_update, hard_update_interval=cfg.hard_update_interval,
            save_dir=cfg.save_dir, save_name="dqn_untrained.pt"
        )
        q_A = train_on(df_train, df_item, cfg_A)
        eval_report("A (untrained)", q_A, test_ds, forget_ds, cfg.device)
    except Exception as e:
        print("\n(A untrained diskip) train_dqn_basic tidak support num_epochs=0:", repr(e))

    # B: trained
    q_B = train_on(df_train, df_item, cfg)
    eval_report("B (trained)", q_B, test_ds, forget_ds, cfg.device)


if __name__ == "__main__":
    main()


Saved model to weights\dqn_untrained.pt

=== A (untrained) ===




Test: {'num_steps': 360.0, 'hit@1': 0.6138888888888889, 'ndcg@1': 0.6820987654320988, 'ndcg@1_num_valid': 324.0, 'hit@3': 0.7805555555555556, 'ndcg@3': 0.7044235983012636, 'ndcg@3_num_valid': 324.0, 'hit@5': 0.8472222222222222, 'ndcg@5': 0.7547887637291426, 'ndcg@5_num_valid': 324.0, 'hit@9': 0.9, 'ndcg@9': 0.8593136822735822, 'ndcg@9_num_valid': 324.0}
Forget: {'num_steps': 360.0, 'hit@1': 0.5027777777777778, 'ndcg@1': 0.5156695156695157, 'ndcg@1_num_valid': 351.0, 'hit@3': 0.7777777777777778, 'ndcg@3': 0.5572555411575187, 'ndcg@3_num_valid': 351.0, 'hit@5': 0.8833333333333333, 'ndcg@5': 0.6307790381276709, 'ndcg@5_num_valid': 351.0, 'hit@9': 0.975, 'ndcg@9': 0.7840613558081819, 'ndcg@9_num_valid': 351.0}

=== Epoch 1/15 ===
[step 0] loss=1.003390 avg_loss=1.003390

=== Epoch 2/15 ===

=== Epoch 3/15 ===

=== Epoch 4/15 ===

=== Epoch 5/15 ===

=== Epoch 6/15 ===

=== Epoch 7/15 ===

=== Epoch 8/15 ===

=== Epoch 9/15 ===

=== Epoch 10/15 ===

=== Epoch 11/15 ===
[step 50] loss=0.4108

In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader

from utils.data_loader import LogConfig, ItemConfig, load_log_table, load_item_table
from dataset.rl4rs_dataset import RL4RSDataset9Step
from train.train_dqn_basic import train_dqn_basic, TrainConfig
from eval.evaluate import evaluate


def split_df_log_train_forget_test(df_log, train_ratio=0.6, forget_ratio=0.2, test_ratio=0.2, seed=42):
    assert abs(train_ratio + forget_ratio + test_ratio - 1.0) < 1e-9
    rng = np.random.default_rng(seed)
    idx = np.arange(len(df_log))
    rng.shuffle(idx)

    n_train = int(len(df_log) * train_ratio)
    n_forget = int(len(df_log) * forget_ratio)

    train_idx = idx[:n_train]
    forget_idx = idx[n_train:n_train + n_forget]
    test_idx = idx[n_train + n_forget:]

    return (
        df_log.iloc[train_idx].reset_index(drop=True),
        df_log.iloc[forget_idx].reset_index(drop=True),
        df_log.iloc[test_idx].reset_index(drop=True),
    )


def make_dataset(df_log_part, df_item):
    return RL4RSDataset9Step(
        df_log=df_log_part,
        df_item=df_item,
        slate_size=9,
        use_tier_flags=True,
        tier_weights=(1.0, 2.0, 4.0),
        exclude_history_candidates=True
    )


def train_on(df_log_train, df_item, cfg, forget_loader=None):
    ds = make_dataset(df_log_train, df_item)
    loader = DataLoader(ds, batch_size=cfg.batch_size, shuffle=True, num_workers=4, pin_memory=True)

    sample0 = ds[0]
    state_dim = sample0["state"].numel()
    item_dim = sample0["item_vec"].numel()

    q = train_dqn_basic(
        train_loader=loader,
        state_dim=state_dim,
        item_dim=item_dim,
        hidden_dim=256,
        cfg=cfg,
        forget_loader=forget_loader  # <-- NEW: untuk decremental RL
    )
    return q


def eval_report(tag, model, test_ds, forget_ds, device, ks=(1, 3, 5, 9)):
    print(f"\n=== {tag} ===")
    print("Test:", evaluate(model, test_ds, device=device, ks=ks))
    print("Forget:", evaluate(model, forget_ds, device=device, ks=ks))


def main():
    log_path = r"E:\Kuliah\Kuliah\Kuliah\PRODI\Semester 7\ProSkripCode\data\raw\trainset.csv"
    item_path = r"E:\Kuliah\Kuliah\Kuliah\PRODI\Semester 7\ProSkripCode\data\raw\item_info.csv"

    log_cfg = LogConfig(path=log_path, slate_size=9, max_click_history=50)
    item_cfg = ItemConfig(path=item_path, item_vec_dim=None)

    df_log = load_log_table(log_cfg).reset_index(drop=True)
    df_item = load_item_table(item_cfg)

    df_log = df_log.iloc[:200].reset_index(drop=True)

    df_train, df_forget, df_test = split_df_log_train_forget_test(df_log, 0.6, 0.2, 0.2, seed=42)

    test_ds = make_dataset(df_test, df_item)
    forget_ds = make_dataset(df_forget, df_item)

    # Loader untuk forget (dipakai saat decremental/unlearning)
    forget_loader = DataLoader(
        forget_ds,
        batch_size=256,
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )

    cfg = TrainConfig(
        device="cuda" if torch.cuda.is_available() else "cpu",
        lr=1e-3,
        gamma=0.99,
        batch_size=256,
        num_epochs=5,
        target_update="hard",
        hard_update_interval=500,
        save_dir="weights",
        save_name="dqn_basic.pt"
    )

    # A: untrained baseline (optional)
    try:
        cfg_A = TrainConfig(
            device=cfg.device, lr=cfg.lr, gamma=cfg.gamma, batch_size=cfg.batch_size,
            num_epochs=0,
            target_update=cfg.target_update, hard_update_interval=cfg.hard_update_interval,
            save_dir=cfg.save_dir, save_name="dqn_untrained.pt"
        )
        q_A = train_on(df_train, df_item, cfg_A)
        eval_report("A (untrained)", q_A, test_ds, forget_ds, cfg.device)
    except Exception as e:
        print("\n(A untrained diskip) train_dqn_basic tidak support num_epochs=0:", repr(e))

    # B: trained (basic)
    # q_B = train_on(df_train, df_item, cfg)
    # eval_report("B (trained)", q_B, test_ds, forget_ds, cfg.device)

    # C: trained + decremental RL unlearning on forget
    cfg_C = TrainConfig(
        device=cfg.device,
        lr=cfg.lr,
        gamma=cfg.gamma,
        batch_size=cfg.batch_size,
        num_epochs=cfg.num_epochs,
        target_update=cfg.target_update,
        hard_update_interval=cfg.hard_update_interval,
        save_dir=cfg.save_dir,
        save_name="dqn_basic_then_dec.pt"
    )

    # --- aktifkan decremental ---
    cfg_C.do_decremental = True
    cfg_C.dec_epochs = 3
    cfg_C.dec_lr = 1e-4
    cfg_C.dec_alpha = 0.5
    cfg_C.dec_save_name = "dqn_decremental.pt"

    q_C = train_on(df_train, df_item, cfg_C, forget_loader=forget_loader)
    eval_report("C (basic + decremental)", q_C, test_ds, forget_ds, cfg.device)


if __name__ == "__main__":
    main()


In [None]:
print("Script executed successfully.")