# 9) Session-based Ranking (GRU4Rec-style)

In [None]:

%%capture
!pip -q install --upgrade pip
!pip -q install datasets transformers sentence-transformers faiss-cpu rank-bm25 torchmetrics scikit-learn lightgbm langdetect unidecode pandas matplotlib tqdm nltk

In [None]:

import numpy as np, torch, torch.nn as nn, torch.optim as optim
from datasets import load_dataset
from tqdm.auto import tqdm
device = "cuda" if torch.cuda.is_available() else "cpu"
np.random.seed(42); torch.manual_seed(42)

In [None]:

ds = load_dataset("amazon_reviews_multi","en", split="train[:20%]")
df = ds.to_pandas()[["reviewer_id","product_id"]].dropna()
if "reviewer_id" not in df.columns:
    df["reviewer_id"] = df.index.astype(int) // 5
seqs = []
for _,g in df.groupby("reviewer_id"):
    items = g["product_id"].astype(str).tolist()
    if len(items)>=5: seqs.append(items[:20])
all_items = {p:i for i,p in enumerate(sorted(set([x for s in seqs for x in s])))}
int_seqs = [[all_items[x] for x in s] for s in seqs]
X, Y = [], []
for s in int_seqs:
    for t in range(1, len(s)):
        X.append(s[:t]); Y.append(s[t])
maxlen=15
X = [x[-maxlen:] for x in X]
Xpad = np.zeros((len(X), maxlen), dtype=np.int64)
for i,seq in enumerate(X): Xpad[i,-len(seq):] = seq
Y = np.array(Y, dtype=np.int64)
TR = int(0.8*len(Xpad)); I=len(all_items)

In [None]:

class GRURec(nn.Module):
    def __init__(self, n_items, d=128):
        super().__init__()
        self.emb = nn.Embedding(n_items, d)
        self.gru = nn.GRU(d, d, batch_first=True)
        self.fc = nn.Linear(d, n_items)
    def forward(self, x):
        x = self.emb(x); h,_ = self.gru(x); return self.fc(h[:,-1,:])

model = GRURec(I).to(device); opt=optim.AdamW(model.parameters(), lr=3e-3); loss=nn.CrossEntropyLoss()
for ep in range(3):
    perm = np.random.permutation(TR)
    for i in range(0, TR, 512):
        xb = torch.tensor(Xpad[perm[i:i+512]], dtype=torch.long).to(device)
        yb = torch.tensor(Y[perm[i:i+512]], dtype=torch.long).to(device)
        opt.zero_grad(); logits = model(xb); l = loss(logits, yb); l.backward(); opt.step()
    with torch.no_grad():
        xb = torch.tensor(Xpad[TR:TR+1024], dtype=torch.long).to(device)
        yb = torch.tensor(Y[TR:TR+1024], dtype=torch.long).to(device)
        topk = torch.topk(model(xb), k=20, dim=1).indices
        rec20 = (topk == yb.unsqueeze(1)).any(dim=1).float().mean().item()
    print("epoch", ep, "Recall@20", rec20)