In [None]:
import os
import torch
import polars as pl

from recbole.config import Config
from recbole.trainer import Trainer
from recbole.utils import init_seed
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender.lightgcn import LightGCN

In [None]:
USE_MOVIELENS = False
DATA_ROOT      = "./data/"
SEED           = 2025
EMB_SIZE       = 128
N_LAYERS       = 3
REG_WEIGHT     = 1e-5
LR             = 1e-3
BATCH_SIZE     = 1024
EPOCHS         = 20

if USE_MOVIELENS:
    dataset_name = "ml-1m"
else:
    dataset_name = "instacart"
    instacart_folder = os.path.join(DATA_ROOT, dataset_name)
    os.makedirs(instacart_folder, exist_ok=True)

    orders = pl.read_csv("data/instacart/orders.csv")
    order_prods = pl.read_csv("data/instacart/order_products__train.csv")
    inter = (
        orders
        .join(order_prods, on="order_id")
        .select(["user_id","product_id"])
        .unique()
        .with_columns(pl.lit(1).alias("rating"))
    )
    inter.select(["user_id","product_id","rating"]) \
         .write_csv(os.path.join(instacart_folder, "interactions.tsv"))

In [None]:
if not USE_MOVIELENS:
    instacart_folder = os.path.join(DATA_ROOT, dataset_name)
    os.makedirs(instacart_folder, exist_ok=True)

    orders      = pl.read_csv("data/instacart/orders.csv")
    order_prods = pl.read_csv("data/instacart/order_products__train.csv")
    inter = (
        orders
        .join(order_prods, on="order_id")
        .select(["user_id","product_id"])
        .unique()
        .with_columns(pl.lit(1).alias("rating"))
    )

    inter_file = os.path.join(instacart_folder, f"{dataset_name}.inter")
    with open(inter_file, "w") as f:
        f.write("user_id:token\tproduct_id:token\trating:float\n")
        for row in inter.iter_rows():
            uid, pid, rating = row
            f.write(f"{uid}\t{pid}\t{rating}\n")

In [None]:
recbole_config = {
    "data_path"       : DATA_ROOT,
    "dataset"         : dataset_name,
    "seed"            : SEED,
    "train_batch_size": BATCH_SIZE,
    "eval_batch_size" : BATCH_SIZE,
    "learning_rate"   : LR,
    "embedding_size"  : EMB_SIZE,
    "n_layers"        : N_LAYERS,
    "reg_weight"      : REG_WEIGHT,
    "epochs"          : EPOCHS,
    "valid_metric"        : "Recall@10",  
    "valid_metric_bigger" : True,
    "valid_every"         : 1,
    "save_model"          : True,
    "checkpoint_dir"      : "./saved_checkpoints",
    **({
        "load_col": {
            "inter": ["user_id", "product_id", "rating"]
        },
        "USER_ID_FIELD": "user_id",
        "ITEM_ID_FIELD": "product_id",
        "RATING_FIELD" : "rating",
    } if not USE_MOVIELENS else {})
}

config = Config(model="LightGCN", config_dict=recbole_config)
init_seed(SEED, reproducibility=True)
dataset = create_dataset(config)
train_data, valid_data, test_data = data_preparation(config, dataset)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model   = LightGCN(config, dataset).to(device)
trainer = Trainer(config, model)

best_model = trainer.fit(
    train_data = train_data, 
    valid_data = valid_data,
    show_progress = True
)

user_embs = model.user_embedding.weight.data.cpu()
torch.save(user_embs, "user_embeddings.pt")

  SparseL = torch.sparse.FloatTensor(i, data, torch.Size(L.shape))
  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|██████████████████████| 1075/1075 [08:46<00:00,  2.04it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████| 116996/116996 [07:12<00:00, 270.76it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mTrain     1[0m: 100%|██████████████████████| 1075/1075 [08:31<00:00,  2.10it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████| 116996/116996 [07:11<00:00, 271.29it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mTrain     2[0m: 100%|██████████████████████| 1075/1075 [08:31<00:00,  2.10it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████| 116996/116996 [07:13<00:00, 270.16it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mTrain     3[0m: 100%|██████████████████████| 1075/1075 [08:31<00:00,  2.10it/s, [1;33mGPU RAM: 1.44 G/5.93 G

✅ Saved user_embeddings.pt


In [None]:
ml_config = {
    "data_path"       : DATA_ROOT,
    "dataset"         : "ml-1m",
    "seed"            : SEED,
    "train_batch_size": BATCH_SIZE,
    "eval_batch_size" : BATCH_SIZE,
    "learning_rate"   : LR,
    "embedding_size"  : EMB_SIZE,
    "n_layers"        : N_LAYERS,
    "reg_weight"      : REG_WEIGHT,
    "epochs"          : EPOCHS,
    "valid_metric"    : "Recall@10",  
    "valid_metric_bigger" : True,
    "valid_every"         : 1,
    "save_model"          : True,
    "checkpoint_dir"      : "./saved_checkpoints",
}

ml_config = Config(model="LightGCN", config_dict=ml_config)
init_seed(SEED, reproducibility=True)
dataset = create_dataset(ml_config)
train_data, valid_data, test_data = data_preparation(ml_config, dataset)

In [None]:
model = LightGCN(ml_config, dataset).to(device)
trainer = Trainer(ml_config, model)

best_model = trainer.fit(
    train_data = train_data,
    valid_data = valid_data,
    show_progress = True
)

user_embs = model.user_embedding.weight.data.cpu()
torch.save(user_embs, "mv_user_embeddings_best.pt")

  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|████████████████████████| 787/787 [02:09<00:00,  6.06it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████████| 6040/6040 [00:23<00:00, 259.10it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mTrain     1[0m: 100%|████████████████████████| 787/787 [02:08<00:00,  6.12it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████████| 6040/6040 [00:23<00:00, 251.71it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mTrain     2[0m: 100%|████████████████████████| 787/787 [02:06<00:00,  6.24it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████████| 6040/6040 [00:23<00:00, 262.59it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mTrain     3[0m: 100%|████████████████████████| 787/787 [02:06<00:00,  6.23it/s, [1;33mGPU RAM: 1.44 G/5.93 G[0m][0m
[1;35mEvaluate   [0m: 100%|█████████████████████| 6040/