In [1]:
import os
import tqdm
import wandb
import warnings
import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import concurrent.futures

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logging
warnings.filterwarnings('ignore', category=UserWarning, module='google.protobuf')

from matplotlib.axes import Axes
from wandb.apis.public import Run

from typing import Union, List, Dict
from src.visualization import set_themes

set_themes() # Set custom themes for plots
pl.Config.set_tbl_rows(20) # Set Polars table display rows limit

pd.set_option('future.no_silent_downcasting', True)

In [2]:
cache_file = "wandb/summary.parquet"
config = {
    "model": "matrix_factorization",
    "ensure_available_locally": False
}
sorting_criterion = {
    "epoch/test_hitrate@50": 0.5,
    "epoch/test_ndcg@50": 0.25,
}

if cache_file is not None and os.path.exists(cache_file):
    print(f"Loading cached experiment runs from {cache_file}...")
    experiment_runs = pl.read_parquet(cache_file)
    print(f"Loaded {len(experiment_runs)} runs from cache.")
else:
    print("No cache file found. Fetching experiment runs from Weights & Biases...")
    api = wandb.Api() # Initialize Weights & Biases API, used for fetching run data

    def fetch_run_metadata(run: Run, considered_metrics: Union[str, Dict[str, float]] = "epoch/epoch") -> Dict:
        run_config = {}
        for key, value in run.config.items():
            # Convert lists and dicts to strings
            if isinstance(value, (list, dict)):
                run_config[key] = str(value)
            else:
                run_config[key] = value

        run_history = run.history()
        run_history = run_history.replace({"Infinity": np.inf, "NaN": np.nan})

        if isinstance(considered_metrics, str):
            run_history["score"] = run_history[considered_metrics]
        elif isinstance(considered_metrics, dict):
            run_history["score"] = sum(
                run_history[metric] * weight for metric, weight in considered_metrics.items()
            )
        else:
            raise ValueError("considered_metrics must be either a string or a dictionary")
        
        best_summary = run_history.iloc[run_history["score"].argmax()]
        best_summary = {f"best:{key}": val for key, val in best_summary.items()}
        
        return {
            "run_id": run.id,
            "run_name": run.name,
            "sweep_id": run.sweep.id if run.sweep else None,
            "model": run.config.get("model"),
            **run_config,
            **{metric: run_history[metric].to_list() for metric in run_history},
            **best_summary,
            "gpu_type": run.metadata.get("gpu"),
            "cpu_count": run.metadata.get("cpu_count"),
        }

    batch_size = 16
    records = []
    futures = {}
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=batch_size)
    runs:List[Run] = api.runs("feedr/peppermint-matrix", per_page=2*batch_size-1, filters={"config.model": config["model"]})
    run_iterator = iter(runs)
    with tqdm.tqdm(total=len(runs), ncols=128) as pbar:
        while len(records) < len(runs):
            # submit new tasks if we empty slots in the batch
            while len(futures) < batch_size and len(records) + len(futures) < len(runs):
                current_runs = next(run_iterator)
                current_future = executor.submit(fetch_run_metadata, current_runs, sorting_criterion)
                futures[current_future] = current_runs

            # check for completed tasks
            finished_futures, _ = concurrent.futures.wait(futures.keys(), return_when=concurrent.futures.FIRST_COMPLETED, timeout=0.1)
            for finished_future in finished_futures:
                finished_run = futures.pop(finished_future)
                records.append(finished_future.result())
                pbar.update(1)

    # Create a Polars DataFrame from the records
    experiment_runs = pl.DataFrame(records, infer_schema_length=None)
    
# Tag run as available locally if the model files exist
local_run_ids = []
local_sweep_ids = os.listdir(f"./models/{config['model']}/")
for sweep_id in local_sweep_ids:
    local_run_ids.extend([run_id for run_id in os.listdir(f"./models/{config['model']}/{sweep_id}/")])
    
experiment_runs = experiment_runs.with_columns(
    available_locally=pl.col("run_id").is_in(local_run_ids)
)

if config["ensure_available_locally"]:
    experiment_runs = experiment_runs.filter(pl.col("available_locally") == True)

experiment_runs = experiment_runs.sort("_timestamp", descending=False)
experiment_runs = experiment_runs.with_columns(
    run_duration_second=pl.col("_runtime").list.max(),
    run_duration_minute=(pl.col("_runtime").list.max() / 60)
)
experiment_runs.select(
    pl.col("run_id"),
    pl.col("run_name"),
    pl.col("sweep_id"),
    pl.col("model"),
    pl.col("embedding_dimension"),
    pl.col("shuffle"),
    pl.col("best:epoch/epoch"),
    pl.col("best:epoch/train_loss"),
    pl.col("best:epoch/test_loss"),
    pl.col("best:epoch/test_recall@10"),
    pl.col("best:epoch/test_ndcg@10"),
)

Loading cached experiment runs from wandb/summary.parquet...
Loaded 1636 runs from cache.


run_id,run_name,sweep_id,model,embedding_dimension,shuffle,best:epoch/epoch,best:epoch/train_loss,best:epoch/test_loss,best:epoch/test_recall@10,best:epoch/test_ndcg@10
str,str,str,str,i64,bool,f64,f64,f64,f64,f64
"""o94q0juk""","""logical-sweep-1""","""nbysw136""","""matrix_factorization""",256,false,52.0,0.330564,0.372714,0.026077,0.100539
"""4ftaae0p""","""stilted-sweep-3""","""nbysw136""","""matrix_factorization""",4,false,59.0,0.693148,0.693148,0.006594,0.02556
"""fway5u2z""","""breezy-sweep-4""","""nbysw136""","""matrix_factorization""",512,false,4.0,0.693147,0.693147,0.0026,0.012059
"""bphcl2xf""","""clean-sweep-2""","""nbysw136""","""matrix_factorization""",1024,false,1.0,0.237272,0.239171,0.024744,0.095121
"""fftz1dek""","""trim-sweep-5""","""nbysw136""","""matrix_factorization""",256,true,57.0,0.319744,0.366027,0.025562,0.099834
"""otb8suw9""","""scarlet-sweep-6""","""nbysw136""","""matrix_factorization""",4,true,63.0,0.133806,0.217083,0.021467,0.082659
"""lvre7srl""","""solar-sweep-7""","""nbysw136""","""matrix_factorization""",256,true,8.0,0.036125,0.194509,0.021456,0.084617
"""dcbj92eg""","""ruby-sweep-8""","""nbysw136""","""matrix_factorization""",256,false,3.0,0.078173,0.179263,0.024295,0.094158
"""x17mnyw8""","""breezy-sweep-9""","""nbysw136""","""matrix_factorization""",8,false,61.0,0.111218,0.207902,0.023699,0.089898
"""7jrm756b""","""super-sweep-10""","""nbysw136""","""matrix_factorization""",4,false,10.0,0.693147,0.693147,0.002425,0.010531


# Parameter Comparison

## L2 Regularization | Shuffle = False

In [3]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == False,
    pl.col("l1_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l2_regularization")
experiment_summary

embedding_dimension,l2_regularization,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@20,best:epoch/test_ndcg@20
i64,f64,u32,f64,f64,f64,f64,f64
2,0.0,6,52.0,0.015826,0.061035,0.028021,0.080881
2,1.0000e-10,4,58.5,0.015172,0.059716,0.027479,0.079558
2,1.0000e-9,4,49.25,0.014855,0.057413,0.026413,0.075915
2,1.0000e-8,5,55.6,0.015249,0.060109,0.027161,0.079271
2,0.0000001,5,55.6,0.014978,0.05851,0.026745,0.077531
2,0.000001,3,34.666667,0.014409,0.057094,0.025906,0.075099
2,0.00001,6,56.5,0.005557,0.023788,0.009413,0.030217
2,0.0001,4,9.5,0.002482,0.010624,0.004251,0.014389
2,0.001,5,11.6,0.000337,0.001539,0.000714,0.002484
2,0.01,6,9.666667,0.000527,0.002127,0.00117,0.003616


### Run Counts

In [4]:
experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,6,4,4,5,5,3,6,4,5,6
4,8,2,4,2,2,8,4,6,4,7
8,9,9,4,3,7,4,3,4,5,2
16,8,2,2,8,2,5,3,9,4,4
32,4,3,3,5,5,6,4,6,9,8
64,6,3,2,4,8,5,3,5,5,2
128,10,2,3,4,7,4,4,3,7,3
256,10,3,4,4,5,4,4,3,5,6
512,6,2,2,5,2,3,3,7,4,4
1024,8,5,2,2,4,3,5,6,3,5


### Best Epoch

In [5]:
experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/epoch"]].pivot(
    values=["best:epoch/epoch"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/epoch"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,52.0,58.5,49.25,55.6,55.6,34.666667,56.5,9.5,11.6,9.666667
4,40.125,54.0,48.25,50.5,60.0,57.625,59.5,7.166667,46.0,10.285714
8,49.555556,45.444444,41.5,38.333333,60.857143,55.25,61.0,6.5,37.4,9.5
16,32.25,23.0,31.5,36.25,58.0,59.2,61.666667,6.222222,46.0,9.75
32,16.75,13.333333,18.666667,23.0,58.8,60.333333,60.5,5.666667,57.666667,8.625
64,7.666667,9.0,8.5,16.0,59.625,53.2,62.0,5.4,60.6,8.5
128,7.5,3.0,3.333333,5.0,55.714286,58.5,62.0,5.333333,57.714286,8.666667
256,6.0,4.666667,3.25,3.75,55.6,49.25,61.5,5.333333,57.6,8.5
512,1.0,1.0,1.0,2.0,31.0,53.0,62.666667,5.571429,62.25,8.5
1024,1.0,1.0,1.0,1.0,5.0,39.666667,12.6,5.5,53.0,8.2


### Best Test Recall@20

In [6]:
experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_recall@20"]].pivot(
    values=["best:epoch/test_recall@20"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_recall@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.028021,0.027479,0.026413,0.027161,0.026745,0.025906,0.009413,0.004251,0.000714,0.00117
4,0.035513,0.036021,0.034871,0.035595,0.036566,0.036103,0.010423,0.004288,0.00062,0.001387
8,0.038919,0.039219,0.039121,0.040172,0.042089,0.041671,0.011141,0.004165,0.000604,0.001154
16,0.039954,0.040318,0.04048,0.041284,0.045753,0.04373,0.011708,0.004219,0.000635,0.001355
32,0.04054,0.041019,0.040565,0.041324,0.047913,0.044841,0.011938,0.004357,0.000637,0.001499
64,0.04002,0.041457,0.041391,0.041036,0.048462,0.045193,0.012688,0.004215,0.000671,0.001687
128,0.039109,0.042331,0.041844,0.042047,0.048909,0.0452,0.012909,0.004322,0.000708,0.001859
256,0.038196,0.041366,0.04183,0.04228,0.047799,0.045084,0.013638,0.004303,0.000725,0.001742
512,0.040534,0.04213,0.041295,0.041465,0.044598,0.04552,0.013945,0.004337,0.000747,0.001457
1024,0.042053,0.042826,0.043019,0.043283,0.042829,0.044551,0.015028,0.004374,0.000742,0.001633


### Best Test NDCG@20

In [7]:
experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(
    values=["best:epoch/test_ndcg@20"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.080881,0.079558,0.075915,0.079271,0.077531,0.075099,0.030217,0.014389,0.002484,0.003616
4,0.10118,0.1025,0.098071,0.100571,0.102822,0.102952,0.033224,0.015206,0.002254,0.004341
8,0.110166,0.110855,0.110785,0.112272,0.11766,0.117419,0.033542,0.014861,0.002137,0.003596
16,0.112943,0.114327,0.114361,0.116159,0.128185,0.122634,0.036012,0.014989,0.002359,0.003994
32,0.11407,0.115382,0.114562,0.11644,0.132597,0.125658,0.037479,0.015193,0.00237,0.004821
64,0.113347,0.116776,0.115964,0.116176,0.13426,0.12657,0.038883,0.014786,0.002622,0.005648
128,0.111862,0.119628,0.118013,0.118536,0.134724,0.126154,0.040305,0.015177,0.002728,0.006084
256,0.108185,0.116493,0.11857,0.119785,0.132158,0.126971,0.041902,0.015182,0.002757,0.00487
512,0.112974,0.11756,0.114751,0.116177,0.125115,0.126861,0.042879,0.015423,0.002837,0.004465
1024,0.117659,0.119106,0.118587,0.12,0.119165,0.125853,0.044897,0.015453,0.002889,0.005158


## L2 Regularization | Shuffle = True

In [8]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == True,
    pl.col("l1_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l2_regularization")
experiment_summary

embedding_dimension,l2_regularization,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@20,best:epoch/test_ndcg@20
i64,f64,u32,f64,f64,f64,f64,f64
2,0.0,7,49.428571,0.014908,0.058728,0.026836,0.078058
2,1.0000e-10,3,48.666667,0.014938,0.060945,0.027117,0.080488
2,1.0000e-9,1,61.0,0.013314,0.054988,0.024519,0.073735
2,1.0000e-8,5,55.6,0.015516,0.060763,0.027582,0.080168
2,0.0000001,2,53.0,0.015778,0.061681,0.027732,0.080954
2,0.000001,3,50.666667,0.01473,0.058977,0.026763,0.078462
2,0.00001,5,45.6,0.012452,0.051055,0.022443,0.066975
2,0.0001,2,11.5,0.002893,0.013134,0.004749,0.016989
2,0.001,7,58.571429,0.000701,0.003661,0.001253,0.005061
2,0.01,4,9.0,0.000672,0.003313,0.001291,0.004804


### Run Counts

In [9]:
experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,7,3,1,5,2,3,5,2,7,4
4,7,3,5,6,5,4,5,8,5,3
8,9,4,2,5,2,6,5,5,4,7
16,8,3,2,10,4,4,9,4,5,6
32,11,2,2,3,5,10,3,3,3,7
64,6,5,4,4,6,5,3,3,4,7
128,6,3,4,5,10,4,6,5,4,4
256,7,2,2,5,5,5,3,4,3,5
512,5,3,4,5,5,7,2,2,5,7
1024,6,4,5,4,3,6,5,5,3,5


### Best Epoch

In [10]:
experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/epoch"]].pivot(
    values=["best:epoch/epoch"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/epoch"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,49.428571,48.666667,61.0,55.6,53.0,50.666667,45.6,11.5,58.571429,9.0
4,54.285714,60.666667,48.6,55.5,61.8,60.0,43.2,8.75,61.4,8.333333
8,50.555556,53.0,62.5,55.2,60.0,59.166667,44.0,10.2,59.25,8.857143
16,44.125,33.333333,52.5,52.3,60.75,56.0,45.444444,11.0,63.0,8.5
32,21.454545,28.0,21.5,41.0,59.6,54.7,48.0,11.666667,61.666667,9.0
64,16.333333,13.8,15.5,21.0,58.5,52.2,53.0,12.0,62.5,8.0
128,14.166667,12.333333,13.75,14.8,57.8,53.25,58.5,10.6,61.75,8.75
256,10.0,11.0,10.0,12.8,53.8,54.6,0.0,9.75,61.666667,7.8
512,6.8,2.333333,9.75,10.0,38.0,49.571429,0.0,10.5,60.0,8.0
1024,0.0,0.0,0.0,0.0,12.333333,52.166667,0.0,12.0,58.666667,8.0


### Best Test Recall@20

In [11]:
experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_recall@20"]].pivot(
    values=["best:epoch/test_recall@20"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_recall@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.026836,0.027117,0.024519,0.027582,0.027732,0.026763,0.022443,0.004749,0.001253,0.001291
4,0.034983,0.035082,0.035621,0.035248,0.036723,0.035059,0.023605,0.004646,0.001419,0.001035
8,0.038897,0.039344,0.039087,0.04002,0.042581,0.041639,0.023367,0.004607,0.001258,0.001233
16,0.040105,0.039796,0.039979,0.041158,0.0465,0.04392,0.023699,0.004754,0.001472,0.001397
32,0.039802,0.039639,0.04,0.040843,0.047524,0.044698,0.02361,0.004856,0.001429,0.001452
64,0.039555,0.039531,0.039444,0.040625,0.048557,0.045053,0.024785,0.004896,0.001584,0.001608
128,0.038365,0.038835,0.038436,0.039411,0.048694,0.044757,0.021124,0.004801,0.001645,0.001317
256,0.037243,0.03723,0.038289,0.038295,0.047734,0.045243,0.017743,0.004785,0.001716,0.001571
512,0.035652,0.036799,0.036013,0.036662,0.044243,0.044985,0.017599,0.004849,0.001893,0.001534
1024,0.036986,0.037841,0.037785,0.037974,0.040105,0.044832,0.016695,0.00499,0.002026,0.001403


### Best Test NDCG@20

In [12]:
experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(
    values=["best:epoch/test_ndcg@20"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.078058,0.080488,0.073735,0.080168,0.080954,0.078462,0.066975,0.016989,0.005061,0.004804
4,0.099938,0.099181,0.101943,0.100334,0.103918,0.101334,0.070803,0.016738,0.005597,0.003315
8,0.10976,0.110979,0.109776,0.113028,0.119901,0.116816,0.071034,0.016199,0.005,0.004304
16,0.113425,0.112979,0.112719,0.115682,0.128256,0.122477,0.071606,0.017082,0.005616,0.004364
32,0.113035,0.112975,0.113315,0.114622,0.131433,0.125395,0.071054,0.016999,0.005687,0.004393
64,0.112397,0.112011,0.112694,0.114441,0.133791,0.126345,0.074572,0.016865,0.006101,0.004945
128,0.109865,0.110493,0.109764,0.112396,0.134253,0.125869,0.065612,0.017077,0.006286,0.004135
256,0.106906,0.107047,0.108992,0.10984,0.131401,0.125951,0.05364,0.017016,0.006449,0.004893
512,0.102407,0.106343,0.104065,0.105173,0.124356,0.126202,0.052732,0.017076,0.007156,0.004955
1024,0.106575,0.109767,0.108375,0.108747,0.113485,0.1262,0.049899,0.017348,0.007753,0.004392


## L1 Regularization | Shuffle = False

In [13]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == False,
    pl.col("l2_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l1_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l1_regularization")
experiment_summary

embedding_dimension,l1_regularization,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@20,best:epoch/test_ndcg@20
i64,f64,u32,f64,f64,f64,f64,f64
2,0.0,6,52.0,0.015826,0.061035,0.028021,0.080881
2,1.0000e-10,2,62.5,0.015539,0.059979,0.027852,0.079986
2,1.0000e-9,4,51.0,0.015298,0.059684,0.027656,0.079496
2,1.0000e-8,6,59.666667,0.014625,0.057231,0.026286,0.075915
2,0.0000001,4,60.0,0.014636,0.057811,0.026239,0.076528
2,0.000001,6,26.833333,0.000567,0.002743,0.000969,0.003734
2,0.00001,6,42.666667,0.000431,0.001875,0.000822,0.002814
2,0.0001,3,34.666667,0.000312,0.001591,0.000572,0.002277
2,0.001,4,41.5,0.000345,0.001615,0.000655,0.002369
2,0.01,7,13.285714,0.000335,0.001495,0.000667,0.002334


### Run Counts

In [14]:
experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,6,2,4.0,6,4.0,6,6,3,4,7
4,8,3,1.0,5,6.0,1,4,3,5,6
8,9,3,6.0,3,,4,7,1,5,3
16,8,1,5.0,2,4.0,4,4,6,4,4
32,4,5,5.0,4,4.0,3,5,5,3,4
64,6,3,2.0,4,4.0,3,3,2,4,2
128,10,3,4.0,1,8.0,2,4,2,4,1
256,10,5,,3,8.0,3,5,2,7,3
512,6,2,2.0,1,2.0,7,2,1,4,5
1024,8,4,7.0,5,5.0,4,5,3,5,3


### Best Epoch

In [15]:
experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/epoch"]].pivot(
    values=["best:epoch/epoch"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/epoch"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,52.0,62.5,51.0,59.666667,60.0,26.833333,42.666667,34.666667,41.5,13.285714
4,40.125,55.333333,49.0,51.6,54.833333,61.0,39.5,14.0,42.4,41.166667
8,49.555556,56.666667,49.666667,45.333333,,39.25,44.428571,6.0,27.2,10.0
16,32.25,13.0,25.8,44.5,55.25,28.5,49.25,7.5,20.75,38.75
32,16.75,17.2,14.0,21.25,42.5,43.333333,42.0,6.0,28.666667,25.75
64,7.666667,5.666667,7.5,19.5,34.25,42.666667,50.666667,6.0,41.0,19.0
128,7.5,5.666667,5.5,3.0,28.125,23.0,34.0,31.5,35.75,55.0
256,6.0,4.6,,9.0,26.125,25.0,46.2,3.0,20.571429,28.666667
512,1.0,1.0,1.0,7.0,18.5,33.571429,51.0,1.0,15.25,37.0
1024,1.0,1.0,1.0,1.0,19.2,41.75,50.6,5.333333,17.0,27.666667


### Best Test Recall@20

In [16]:
experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_recall@20"]].pivot(
    values=["best:epoch/test_recall@20"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_recall@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.028021,0.027852,0.027656,0.026286,0.026239,0.000969,0.000822,0.000572,0.000655,0.000667
4,0.035513,0.03564,0.034404,0.034933,0.035129,0.000904,0.000714,0.000601,0.000606,0.000657
8,0.038919,0.039391,0.038882,0.039266,,0.000835,0.000679,0.0006,0.000629,0.00063
16,0.039954,0.040006,0.040462,0.040408,0.043224,0.00084,0.000676,0.000566,0.000589,0.000624
32,0.04054,0.040881,0.04125,0.041556,0.04476,0.000818,0.000683,0.000551,0.000624,0.000614
64,0.04002,0.041657,0.041703,0.041756,0.045351,0.000833,0.000711,0.000553,0.000629,0.000619
128,0.039109,0.041531,0.041809,0.043212,0.046793,0.000822,0.000702,0.000557,0.000607,0.000626
256,0.038196,0.041301,,0.042063,0.04729,0.000819,0.00066,0.000542,0.0006,0.000617
512,0.040534,0.041669,0.042037,0.040776,0.047714,0.000825,0.000674,0.000626,0.000633,0.000631
1024,0.042053,0.042571,0.043222,0.043268,0.047753,0.000795,0.000657,0.000562,0.000591,0.000607


### Best Test NDCG@20

In [17]:
experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_ndcg@20"]].pivot(
    values=["best:epoch/test_ndcg@20"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_ndcg@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.080881,0.079986,0.079496,0.075915,0.076528,0.003734,0.002814,0.002277,0.002369,0.002334
4,0.10118,0.101263,0.097113,0.099579,0.099798,0.003355,0.002529,0.00211,0.002201,0.002352
8,0.110166,0.11107,0.110459,0.111534,,0.003143,0.00246,0.002173,0.002198,0.002279
16,0.112943,0.113509,0.114139,0.114551,0.122376,0.002973,0.002539,0.002112,0.002235,0.002192
32,0.11407,0.115561,0.116807,0.116688,0.125089,0.00308,0.002405,0.002072,0.00226,0.002309
64,0.113347,0.116229,0.117257,0.117801,0.126617,0.003027,0.00243,0.001974,0.002197,0.002186
128,0.111862,0.117618,0.11712,0.1218,0.130339,0.002906,0.002506,0.002079,0.002249,0.002287
256,0.108185,0.116869,,0.118328,0.131573,0.002915,0.002428,0.001958,0.002204,0.00225
512,0.112974,0.116273,0.11675,0.115347,0.133339,0.002967,0.002427,0.002288,0.002256,0.002211
1024,0.117659,0.117839,0.11948,0.12039,0.132359,0.002954,0.002392,0.001925,0.002163,0.002152


## L1 Regularization | Shuffle = True

In [18]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == True,
    pl.col("l2_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l1_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l1_regularization")
experiment_summary

embedding_dimension,l1_regularization,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@20,best:epoch/test_ndcg@20
i64,f64,u32,f64,f64,f64,f64,f64
2,0.0,7,49.428571,0.014908,0.058728,0.026836,0.078058
2,1.0000e-10,3,46.333333,0.015697,0.061469,0.02811,0.08137
2,1.0000e-9,8,56.875,0.014706,0.057305,0.026352,0.076123
2,1.0000e-8,4,48.25,0.01544,0.060524,0.027689,0.080396
2,0.0000001,4,58.25,0.014599,0.056736,0.026472,0.076008
2,0.000001,9,20.222222,0.005546,0.02204,0.009493,0.028311
2,0.00001,2,44.0,0.000392,0.001819,0.000775,0.002751
2,0.0001,3,18.0,0.000319,0.001426,0.000652,0.002261
2,0.001,3,24.666667,0.000367,0.001795,0.00068,0.002593
2,0.01,7,38.285714,0.000377,0.001841,0.000718,0.002648


### Run Counts

In [19]:
experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,7,3.0,8,4,4,9,2,3.0,3,7
4,7,1.0,4,2,4,5,3,3.0,1,3
8,9,1.0,7,3,2,5,3,3.0,3,2
16,8,4.0,6,5,4,3,2,5.0,3,3
32,11,1.0,4,7,4,6,4,4.0,4,5
64,6,4.0,3,1,3,1,4,4.0,6,3
128,6,3.0,4,6,5,8,7,3.0,7,4
256,7,,5,6,3,2,4,2.0,5,5
512,5,6.0,5,5,5,5,4,6.0,5,4
1024,6,4.0,6,6,4,5,2,,3,5


### Best Epoch

In [20]:
experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/epoch"]].pivot(
    values=["best:epoch/epoch"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/epoch"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,49.428571,46.333333,56.875,48.25,58.25,20.222222,44.0,18.0,24.666667,38.285714
4,54.285714,53.0,51.75,47.0,61.0,1.4,51.0,5.333333,63.0,38.333333
8,50.555556,46.0,52.285714,49.666667,62.5,26.0,42.666667,4.0,40.0,19.5
16,44.125,36.25,38.333333,45.8,57.25,33.666667,51.0,6.4,12.0,29.0
32,21.454545,31.0,20.0,28.0,51.75,48.5,37.5,11.5,23.0,33.4
64,16.333333,16.25,18.0,23.0,30.333333,62.0,47.5,6.75,27.166667,28.0
128,14.166667,15.0,11.0,15.833333,21.8,46.75,42.714286,4.666667,22.285714,21.75
256,10.0,,9.2,14.166667,16.666667,44.5,38.25,26.5,18.0,22.6
512,6.8,7.666667,9.2,10.2,16.4,41.4,42.0,10.166667,31.6,38.75
1024,0.0,1.25,0.0,0.0,13.5,34.2,49.0,,41.333333,39.2


### Best Test Recall@20

In [21]:
experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_recall@20"]].pivot(
    values=["best:epoch/test_recall@20"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_recall@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.026836,0.02811,0.026352,0.027689,0.026472,0.009493,0.000775,0.000652,0.00068,0.000718
4,0.034983,0.035544,0.034672,0.03455,0.035874,0.008691,0.000689,0.000596,0.000564,0.000645
8,0.038897,0.039112,0.03893,0.039577,0.040479,0.023306,0.000715,0.000625,0.000662,0.000591
16,0.040105,0.040053,0.039879,0.040207,0.043309,0.031818,0.000607,0.000554,0.000637,0.000606
32,0.039802,0.040156,0.039829,0.040821,0.044144,0.032159,0.000646,0.00054,0.000596,0.000592
64,0.039555,0.039763,0.039512,0.040515,0.044442,0.032444,0.000678,0.000568,0.000599,0.000616
128,0.038365,0.038617,0.038382,0.040188,0.045027,0.034147,0.000681,0.000545,0.000594,0.000623
256,0.037243,,0.037835,0.039457,0.045375,0.034354,0.000681,0.000541,0.0006,0.00061
512,0.035652,0.035984,0.036169,0.038023,0.045771,0.036023,0.000647,0.000568,0.000586,0.000611
1024,0.036986,0.03644,0.037874,0.037593,0.046157,0.034933,0.000667,,0.000602,0.000609


### Best Test NDCG@20

In [22]:
experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_ndcg@20"]].pivot(
    values=["best:epoch/test_ndcg@20"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_ndcg@20"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2,0.078058,0.08137,0.076123,0.080396,0.076008,0.028311,0.002751,0.002261,0.002593,0.002648
4,0.099938,0.099277,0.09854,0.10081,0.101107,0.027767,0.002588,0.002179,0.002079,0.002373
8,0.10976,0.109467,0.110733,0.112334,0.114049,0.069098,0.002536,0.002041,0.002391,0.00225
16,0.113425,0.113423,0.112999,0.113826,0.122009,0.091434,0.002402,0.002032,0.002234,0.002268
32,0.113035,0.113218,0.113298,0.115517,0.123725,0.092424,0.002407,0.001938,0.00216,0.00215
64,0.112397,0.113374,0.112736,0.114183,0.124261,0.093156,0.002492,0.002012,0.002147,0.002292
128,0.109865,0.109903,0.109842,0.113995,0.126457,0.097509,0.002447,0.00198,0.002215,0.002298
256,0.106906,,0.108212,0.112283,0.127094,0.098175,0.002438,0.001979,0.002101,0.002236
512,0.102407,0.10411,0.103905,0.109402,0.127677,0.102365,0.002373,0.001998,0.002254,0.002301
1024,0.106575,0.105476,0.108838,0.109243,0.128846,0.099836,0.002408,,0.002175,0.002289


# Cross-GPU Training

In [None]:
experiment_runs.group_by("embedding_dimension").agg(
    pl.col("run_duration_minute").mean()
).sort("embedding_dimension")

embedding_dimension,run_duration_minute
i64,f64
2,12.470927
4,12.521701
8,12.476034
16,12.452547
32,12.371943
64,12.583647
128,12.631644
256,12.549739
512,13.229172
1024,15.293637


In [19]:
experiment_runs.group_by("gpu_type").agg(
    pl.col("run_duration_minute").mean(),
    
).sort("gpu_type")

gpu_type,run_duration_minute
str,f64
"""NVIDIA A10G""",12.567288
"""NVIDIA L4""",17.202939


In [20]:
experiment_summary = experiment_runs.filter(
    pl.col("l2_regularization") == 0,
    pl.col("l1_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by("embedding_dimension", "gpu_type").agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@50").mean(),
    pl.col("best:epoch/test_ndcg@50").mean(),
).sort("embedding_dimension", "gpu_type")
experiment_summary

embedding_dimension,gpu_type,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@50,best:epoch/test_ndcg@50
i64,str,u32,f64,f64,f64,f64,f64
2,"""NVIDIA A10G""",8,47.375,0.015245,0.059927,0.054253,0.106359
2,"""NVIDIA L4""",2,54.5,0.01585,0.061646,0.056766,0.1099
4,"""NVIDIA A10G""",13,47.769231,0.019881,0.077368,0.070876,0.132634
8,"""NVIDIA A10G""",14,49.571429,0.02198,0.085266,0.078396,0.143867
8,"""NVIDIA L4""",1,48.0,0.022055,0.08557,0.078698,0.144032
16,"""NVIDIA A10G""",9,39.222222,0.022658,0.087821,0.080241,0.147057
16,"""NVIDIA L4""",1,23.0,0.023082,0.089788,0.081257,0.148892
32,"""NVIDIA A10G""",12,19.25,0.022835,0.088561,0.080073,0.14754
64,"""NVIDIA A10G""",11,11.181818,0.022757,0.088169,0.079091,0.146335
128,"""NVIDIA A10G""",14,10.5,0.022307,0.086629,0.07742,0.144289


In [21]:
experiment_summary[["embedding_dimension", "gpu_type", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="gpu_type"
)

  experiment_summary[["embedding_dimension", "gpu_type", "num_runs"]].pivot(


embedding_dimension,NVIDIA A10G,NVIDIA L4
i64,u32,u32
2,8,2.0
4,13,
8,14,1.0
16,9,1.0
32,12,
64,11,
128,14,1.0
256,11,
512,6,2.0
1024,11,


In [22]:
experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_recall@50"]].pivot(
    values=["best:epoch/test_recall@50"],
    index="embedding_dimension",
    columns="gpu_type"
)

  experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_recall@50"]].pivot(


embedding_dimension,NVIDIA A10G,NVIDIA L4
i64,f64,f64
2,0.054253,0.056766
4,0.070876,
8,0.078396,0.078698
16,0.080241,0.081257
32,0.080073,
64,0.079091,
128,0.07742,0.077215
256,0.074939,
512,0.07454,0.075376
1024,0.077792,


In [23]:
experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_ndcg@50"]].pivot(
    values=["best:epoch/test_ndcg@50"],
    index="embedding_dimension",
    columns="gpu_type"
)

  experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_ndcg@50"]].pivot(


embedding_dimension,NVIDIA A10G,NVIDIA L4
i64,f64,f64
2,0.106359,0.1099
4,0.132634,
8,0.143867,0.144032
16,0.147057,0.148892
32,0.14754,
64,0.146335,
128,0.144289,0.145288
256,0.139993,
512,0.138887,0.140036
1024,0.144141,
