In [25]:
import os
import tqdm
import wandb
import warnings
import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import concurrent.futures

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logging
warnings.filterwarnings('ignore', category=UserWarning, module='google.protobuf')

from matplotlib.axes import Axes
from wandb.apis.public import Run

from typing import Union, List, Dict
from src.visualization import set_themes

set_themes() # Set custom themes for plots
pl.Config.set_tbl_rows(20) # Set Polars table display rows limit

pd.set_option('future.no_silent_downcasting', True) 

In [2]:
cache_file = "wandb/summary.parquet"
config = {
    "model": "matrix_factorization",
    "ensure_available_locally": False
}
sorting_criterion = {
    "epoch/test_hitrate@50": 0.5,
    "epoch/test_ndcg@50": 0.25,
}

if cache_file is not None and os.path.exists(cache_file):
    print(f"Loading cached experiment runs from {cache_file}...")
    experiment_runs = pl.read_parquet(cache_file)
    print(f"Loaded {len(experiment_runs)} runs from cache.")
else:
    print("No cache file found. Fetching experiment runs from Weights & Biases...")
    api = wandb.Api() # Initialize Weights & Biases API, used for fetching run data

    def fetch_run_metadata(run: Run, considered_metrics: Union[str, Dict[str, float]] = "epoch/epoch") -> Dict:
        run_config = {}
        for key, value in run.config.items():
            # Convert lists and dicts to strings
            if isinstance(value, (list, dict)):
                run_config[key] = str(value)
            else:
                run_config[key] = value

        run_history = run.history()
        run_history = run_history.replace({"Infinity": np.inf, "NaN": np.nan})

        if isinstance(considered_metrics, str):
            run_history["score"] = run_history[considered_metrics]
        elif isinstance(considered_metrics, dict):
            run_history["score"] = sum(
                run_history[metric] * weight for metric, weight in considered_metrics.items()
            )
        else:
            raise ValueError("considered_metrics must be either a string or a dictionary")
        
        best_summary = run_history.iloc[run_history["score"].argmax()]
        best_summary = {f"best:{key}": val for key, val in best_summary.items()}
        
        return {
            "run_id": run.id,
            "run_name": run.name,
            "sweep_id": run.sweep.id if run.sweep else None,
            "model": run.config.get("model"),
            **run_config,
            **{metric: run_history[metric].to_list() for metric in run_history},
            **best_summary,
            "gpu_type": run.metadata.get("gpu"),
            "cpu_count": run.metadata.get("cpu_count"),
        }

    batch_size = 16
    records = []
    futures = {}
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=batch_size)
    runs:List[Run] = api.runs("feedr/peppermint-matrix", per_page=2*batch_size-1, filters={"config.model": config["model"]})
    run_iterator = iter(runs)
    with tqdm.tqdm(total=len(runs), ncols=128) as pbar:
        while len(records) < len(runs):
            # submit new tasks if we empty slots in the batch
            while len(futures) < batch_size and len(records) + len(futures) < len(runs):
                current_runs = next(run_iterator)
                current_future = executor.submit(fetch_run_metadata, current_runs, sorting_criterion)
                futures[current_future] = current_runs

            # check for completed tasks
            finished_futures, _ = concurrent.futures.wait(futures.keys(), return_when=concurrent.futures.FIRST_COMPLETED, timeout=0.1)
            for finished_future in finished_futures:
                finished_run = futures.pop(finished_future)
                records.append(finished_future.result())
                pbar.update(1)

    # Create a Polars DataFrame from the records
    experiment_runs = pl.DataFrame(records, infer_schema_length=None)
    
# Tag run as available locally if the model files exist
local_run_ids = []
local_sweep_ids = os.listdir(f"./models/{config['model']}/")
for sweep_id in local_sweep_ids:
    local_run_ids.extend([run_id for run_id in os.listdir(f"./models/{config['model']}/{sweep_id}/")])
    
experiment_runs = experiment_runs.with_columns(
    available_locally=pl.col("run_id").is_in(local_run_ids)
)

if config["ensure_available_locally"]:
    experiment_runs = experiment_runs.filter(pl.col("available_locally") == True)

experiment_runs = experiment_runs.sort("_timestamp", descending=False)
experiment_runs = experiment_runs.with_columns(
    run_duration_second=pl.col("_runtime").list.max(),
    run_duration_minute=(pl.col("_runtime").list.max() / 60)
)
experiment_runs.select(
    pl.col("run_id"),
    pl.col("run_name"),
    pl.col("sweep_id"),
    pl.col("model"),
    pl.col("embedding_dimension"),
    pl.col("shuffle"),
    pl.col("best:epoch/epoch"),
    pl.col("best:epoch/train_loss"),
    pl.col("best:epoch/test_loss"),
    pl.col("best:epoch/test_recall@10"),
    pl.col("best:epoch/test_ndcg@10"),
)

Loading cached experiment runs from wandb/summary.parquet...
Loaded 2343 runs from cache.


run_id,run_name,sweep_id,model,embedding_dimension,shuffle,best:epoch/epoch,best:epoch/train_loss,best:epoch/test_loss,best:epoch/test_recall@10,best:epoch/test_ndcg@10
str,str,str,str,i64,bool,f64,f64,f64,f64,f64
"""o94q0juk""","""logical-sweep-1""","""nbysw136""","""matrix_factorization""",256,false,52.0,0.330564,0.372714,0.026077,0.100539
"""4ftaae0p""","""stilted-sweep-3""","""nbysw136""","""matrix_factorization""",4,false,59.0,0.693148,0.693148,0.006594,0.02556
"""fway5u2z""","""breezy-sweep-4""","""nbysw136""","""matrix_factorization""",512,false,4.0,0.693147,0.693147,0.0026,0.012059
"""bphcl2xf""","""clean-sweep-2""","""nbysw136""","""matrix_factorization""",1024,false,1.0,0.237272,0.239171,0.024744,0.095121
"""fftz1dek""","""trim-sweep-5""","""nbysw136""","""matrix_factorization""",256,true,57.0,0.319744,0.366027,0.025562,0.099834
"""otb8suw9""","""scarlet-sweep-6""","""nbysw136""","""matrix_factorization""",4,true,63.0,0.133806,0.217083,0.021467,0.082659
"""lvre7srl""","""solar-sweep-7""","""nbysw136""","""matrix_factorization""",256,true,8.0,0.036125,0.194509,0.021456,0.084617
"""dcbj92eg""","""ruby-sweep-8""","""nbysw136""","""matrix_factorization""",256,false,3.0,0.078173,0.179263,0.024295,0.094158
"""x17mnyw8""","""breezy-sweep-9""","""nbysw136""","""matrix_factorization""",8,false,61.0,0.111218,0.207902,0.023699,0.089898
"""7jrm756b""","""super-sweep-10""","""nbysw136""","""matrix_factorization""",4,false,10.0,0.693147,0.693147,0.002425,0.010531


# High Level Parameter Comparison
We pick one parameter and then see its impact on the overall performance, setting the other parameters to their best values.

## Global Leaderboard

In [3]:
experiment_summary = experiment_runs.filter(
    # pl.col("shuffle") == False,
    # pl.col("embedding_dimension") == 64,
    # pl.col("l1_regularization") == 0.0,
    # pl.col("l2_regularization") == 0.0,
    # pl.col("embedding_dropout_rate") == 0.0
).group_by( # group over random_seed runs
    "shuffle", "embedding_dimension", "l2_regularization", "l1_regularization", "embedding_dropout_rate"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort(
    "best:epoch/test_recall@20", descending=True
).with_columns(
    pl.col("l1_regularization").cast(pl.Utf8),
    pl.col("l2_regularization").cast(pl.Utf8)
)
experiment_summary.to_pandas()

Unnamed: 0,shuffle,embedding_dimension,l2_regularization,l1_regularization,embedding_dropout_rate,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@20,best:epoch/test_ndcg@20
0,False,128,1e-7,1e-7,0,7,53.285714,0.028866,0.109814,0.050247,0.137913
1,True,512,1e-7,1e-7,0,1,28.000000,0.029088,0.109141,0.050239,0.137186
2,False,256,1e-7,1e-7,0,2,54.500000,0.028998,0.110754,0.050139,0.138627
3,False,256,1e-7,1e-8,0,1,61.000000,0.028682,0.108945,0.049684,0.137072
4,True,128,1e-7,1e-7,0,2,54.000000,0.028584,0.107302,0.049522,0.135343
...,...,...,...,...,...,...,...,...,...,...,...
552,True,128,0.00001,0.00001,0,6,9.500000,0.000227,0.001042,0.000466,0.001649
553,True,512,0.0001,1e-6,0,1,0.000000,0.000210,0.001027,0.000458,0.001626
554,False,128,0.00001,0.00001,0,3,2.000000,0.000211,0.001040,0.000440,0.001603
555,False,512,0.0001,1e-6,0,1,1.000000,0.000203,0.001030,0.000424,0.001650


## Shuffle Impact

In [4]:
experiment_summary = experiment_runs.filter(
    # pl.col("shuffle") == False,
    # pl.col("embedding_dimension") == 64,
    # pl.col("l1_regularization") == 0.0,
    # pl.col("l2_regularization") == 0.0,
    # pl.col("embedding_dropout_rate") == 0.0
).group_by( # group over random_seed runs
    "shuffle", "embedding_dimension", "l2_regularization", "l1_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).group_by( # group over unrelevant parameters, e.g., regularization
    "shuffle", "embedding_dimension"
).agg(
    pl.col("num_runs").sum(),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").max(),
    pl.col("best:epoch/test_ndcg@10").max(),
    pl.col("best:epoch/test_recall@20").max(),
    pl.col("best:epoch/test_ndcg@20").max(),
).sort("shuffle", "embedding_dimension")

### Run Counts

In [5]:
experiment_summary[["embedding_dimension", "shuffle", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="shuffle"
)

  experiment_summary[["embedding_dimension", "shuffle", "num_runs"]].pivot(


embedding_dimension,false,true
i64,u32,u32
2,91,88
4,86,84
8,88,82
16,82,90
32,91,90
64,92,100
128,295,350
256,112,104
512,89,116
1024,104,109


### Best Test Recall@20

In [7]:
(
    experiment_summary[["embedding_dimension", "shuffle", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="embedding_dimension",
        on="shuffle"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,false,true
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1
2,0.028021,0.02811
4,0.036566,0.036723
8,0.042089,0.042581
16,0.045753,0.0465
32,0.047913,0.047524
64,0.048462,0.048557
128,0.050247,0.049522
256,0.047799,0.047734
512,0.047714,0.045771
1024,0.047753,0.046157


### Best Test NDCG@20

In [8]:
(
    experiment_summary[["embedding_dimension", "shuffle", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="embedding_dimension",
        on="shuffle"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,false,true
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1
2,0.080881,0.08137
4,0.102952,0.103918
8,0.11766,0.119901
16,0.128185,0.128256
32,0.132597,0.131433
64,0.13426,0.133791
128,0.137913,0.13656
256,0.132158,0.131401
512,0.133339,0.127677
1024,0.132359,0.128846


# Parameter Comparison Deep Dive
Here we select only 2 free variables and fixed the rest to see what effect these two variable had

## L2 Regularization, Shuffle = False, L1 = 0.0, Dropout = 0.0

In [8]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == False,
    pl.col("l1_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l2_regularization")

### Run Counts

In [10]:
experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,6,4,4,5,5,3,6,4,5,6
4,9,2,4,2,2,8,4,6,4,7
8,9,9,4,3,7,4,3,4,5,3
16,8,2,2,8,2,5,3,9,4,4
32,4,3,3,5,5,6,4,6,9,8
64,6,3,3,4,8,5,4,5,5,2
128,15,2,7,8,13,8,8,6,7,3
256,10,3,4,4,5,4,5,4,5,6
512,6,2,3,6,2,4,4,7,4,4
1024,8,5,2,2,4,3,5,8,3,5


### Best Epoch

In [12]:
(
    experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/epoch"]].pivot(
        values=["best:epoch/epoch"],
        index="embedding_dimension",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="Blues", axis=None)
    .format("{:.2f}")
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,52.0,58.5,49.25,55.6,55.6,34.67,56.5,9.5,11.6,9.67
4,42.67,54.0,48.25,50.5,60.0,57.62,59.5,7.17,46.0,10.29
8,49.56,45.44,41.5,38.33,60.86,55.25,61.0,6.5,37.4,9.67
16,32.25,23.0,31.5,36.25,58.0,59.2,61.67,6.22,46.0,9.75
32,16.75,13.33,18.67,23.0,58.8,60.33,60.5,5.67,57.67,8.62
64,7.67,9.0,7.33,16.0,59.62,53.2,62.0,5.4,60.6,8.5
128,7.27,3.0,4.0,4.62,55.54,51.5,62.12,6.17,57.71,8.67
256,6.0,4.67,3.25,3.75,55.6,49.25,61.6,5.75,57.6,8.5
512,1.0,1.0,1.67,2.5,31.0,48.5,62.25,5.57,62.25,8.5
1024,1.0,1.0,1.0,1.0,5.0,39.67,12.6,5.5,53.0,8.2


### Best Test Recall@20

In [14]:
(
    experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="embedding_dimension",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.028021,0.027479,0.026413,0.027161,0.026745,0.025906,0.009413,0.004251,0.000714,0.00117
4,0.035421,0.036021,0.034871,0.035595,0.036566,0.036103,0.010423,0.004288,0.00062,0.001387
8,0.038919,0.039219,0.039121,0.040172,0.042089,0.041671,0.011141,0.004165,0.000604,0.001469
16,0.039954,0.040318,0.04048,0.041284,0.045753,0.04373,0.011708,0.004219,0.000635,0.001355
32,0.04054,0.041019,0.040565,0.041324,0.047913,0.044841,0.011938,0.004357,0.000637,0.001499
64,0.04002,0.041457,0.04145,0.041036,0.048462,0.045193,0.012537,0.004215,0.000671,0.001687
128,0.039105,0.042331,0.041816,0.041981,0.048783,0.045145,0.013157,0.004336,0.000708,0.001859
256,0.038196,0.041366,0.04183,0.04228,0.047799,0.045084,0.013644,0.004295,0.000725,0.001742
512,0.040534,0.04213,0.041043,0.041388,0.044598,0.045467,0.013858,0.004337,0.000747,0.001457
1024,0.042053,0.042826,0.043019,0.043283,0.042829,0.044551,0.015028,0.004382,0.000742,0.001633


### Best Test NDCG@20

In [15]:
(
    experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="embedding_dimension",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.080881,0.079558,0.075915,0.079271,0.077531,0.075099,0.030217,0.014389,0.002484,0.003616
4,0.101048,0.1025,0.098071,0.100571,0.102822,0.102952,0.033224,0.015206,0.002254,0.004341
8,0.110166,0.110855,0.110785,0.112272,0.11766,0.117419,0.033542,0.014861,0.002137,0.004398
16,0.112943,0.114327,0.114361,0.116159,0.128185,0.122634,0.036012,0.014989,0.002359,0.003994
32,0.11407,0.115382,0.114562,0.11644,0.132597,0.125658,0.037479,0.015193,0.00237,0.004821
64,0.113347,0.116776,0.11636,0.116176,0.13426,0.12657,0.038292,0.014786,0.002622,0.005648
128,0.11218,0.119628,0.118182,0.118205,0.134612,0.126499,0.040775,0.015099,0.002728,0.006084
256,0.108185,0.116493,0.11857,0.119785,0.132158,0.126971,0.04198,0.015197,0.002757,0.00487
512,0.112974,0.11756,0.115094,0.116202,0.125115,0.126949,0.04269,0.015423,0.002837,0.004465
1024,0.117659,0.119106,0.118587,0.12,0.119165,0.125853,0.044897,0.01544,0.002889,0.005158


## L2 Regularization, Shuffle = True, L1 = 0.0, Dropout = 0.0

In [16]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == True,
    pl.col("l1_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l2_regularization")

### Run Counts

In [18]:
experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l2_regularization"
)

  experiment_summary[["embedding_dimension", "l2_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,7,3,3,5,3,4,5,3,7,4
4,7,3,5,6,5,4,5,8,5,3
8,9,4,2,5,2,6,5,5,4,7
16,8,3,2,10,4,4,9,4,5,6
32,11,2,2,3,5,10,3,3,3,7
64,6,5,5,5,6,5,4,5,4,7
128,12,3,9,10,16,12,8,10,4,4
256,8,2,2,8,6,8,3,4,3,5
512,5,3,5,5,5,7,2,2,5,7
1024,6,4,6,4,3,6,6,7,3,5


### Best Epoch

In [19]:
(
    experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/epoch"]].pivot(
        values=["best:epoch/epoch"],
        index="embedding_dimension",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="Blues", axis=None)
    .format("{:.2f}")
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,49.43,48.67,61.67,55.6,52.67,44.0,45.6,9.67,58.57,9.0
4,54.29,60.67,48.6,55.5,61.8,60.0,43.2,8.75,61.4,8.33
8,50.56,53.0,62.5,55.2,60.0,59.17,44.0,10.2,59.25,8.86
16,44.12,33.33,52.5,52.3,60.75,56.0,45.44,11.0,63.0,8.5
32,21.45,28.0,21.5,41.0,59.6,54.7,48.0,11.67,61.67,9.0
64,16.33,13.8,16.0,19.6,58.5,52.2,50.75,12.0,62.5,8.0
128,14.58,12.33,12.56,15.4,58.06,51.33,57.5,11.2,61.75,8.75
256,10.25,11.0,10.0,13.5,54.17,51.88,0.0,9.75,61.67,7.8
512,6.8,2.33,9.2,10.0,38.0,49.57,0.0,10.5,60.0,8.0
1024,0.0,0.0,0.0,0.0,12.33,52.17,0.0,11.71,58.67,8.0


### Best Test Recall@20

In [20]:
(
    experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="embedding_dimension",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.026836,0.027117,0.026956,0.027582,0.027373,0.027062,0.022443,0.004824,0.001253,0.001291
4,0.034983,0.035082,0.035621,0.035248,0.036723,0.035059,0.023605,0.004646,0.001419,0.001035
8,0.038897,0.039344,0.039087,0.04002,0.042581,0.041639,0.023367,0.004607,0.001258,0.001233
16,0.040105,0.039796,0.039979,0.041158,0.0465,0.04392,0.023699,0.004754,0.001472,0.001397
32,0.039802,0.039639,0.04,0.040843,0.047524,0.044698,0.02361,0.004856,0.001429,0.001452
64,0.039555,0.039531,0.039474,0.040597,0.048557,0.045053,0.024692,0.00494,0.001584,0.001608
128,0.038418,0.038835,0.038806,0.039589,0.048684,0.044898,0.021054,0.004778,0.001645,0.001317
256,0.037247,0.03723,0.038289,0.038235,0.047758,0.045301,0.017743,0.004785,0.001716,0.001571
512,0.035652,0.036799,0.036026,0.036662,0.044243,0.044985,0.017599,0.004849,0.001893,0.001534
1024,0.036986,0.037841,0.037769,0.037974,0.040105,0.044832,0.016696,0.004946,0.002026,0.001403


### Best Test NDCG@20

In [21]:
(
    experiment_summary[["embedding_dimension", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="embedding_dimension",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.078058,0.080488,0.07889,0.080168,0.080319,0.079099,0.066975,0.016931,0.005061,0.004804
4,0.099938,0.099181,0.101943,0.100334,0.103918,0.101334,0.070803,0.016738,0.005597,0.003315
8,0.10976,0.110979,0.109776,0.113028,0.119901,0.116816,0.071034,0.016199,0.005,0.004304
16,0.113425,0.112979,0.112719,0.115682,0.128256,0.122477,0.071606,0.017082,0.005616,0.004364
32,0.113035,0.112975,0.113315,0.114622,0.131433,0.125395,0.071054,0.016999,0.005687,0.004393
64,0.112397,0.112011,0.112753,0.114397,0.133791,0.126345,0.074344,0.017074,0.006101,0.004945
128,0.109821,0.110493,0.11102,0.112997,0.134407,0.126195,0.065463,0.016941,0.006286,0.004135
256,0.106935,0.107047,0.108992,0.109369,0.131446,0.12617,0.05364,0.017016,0.006449,0.004893
512,0.102407,0.106343,0.103752,0.105173,0.124356,0.126202,0.052732,0.017076,0.007156,0.004955
1024,0.106575,0.109767,0.108463,0.108747,0.113485,0.1262,0.049968,0.017277,0.007753,0.004392


## L1 Regularizatio, Shuffle = False, L2 = 0.0, Dropout = 0.0

In [26]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == False,
    pl.col("l2_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l1_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l1_regularization")

### Run Counts

In [27]:
experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,6,3,4,6,4,6,6,3,4,7
4,9,3,3,5,6,3,4,3,5,6
8,9,3,6,3,3,4,7,3,5,3
16,8,2,5,2,4,4,4,6,4,4
32,4,5,5,4,4,3,5,5,3,4
64,6,3,2,6,5,4,3,4,4,2
128,15,3,10,5,10,6,8,5,4,3
256,10,5,3,3,8,4,5,3,7,3
512,6,2,2,3,2,8,2,2,4,5
1024,8,4,7,5,5,4,5,3,5,3


### Best Epoch

In [28]:
(
    experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/epoch"]].pivot(
        values=["best:epoch/epoch"],
        index="embedding_dimension",
        on="l1_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="Blues", axis=None)
    .format("{:.2f}")
)


Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,52.0,62.0,51.0,59.67,60.0,26.83,42.67,34.67,41.5,13.29
4,42.67,55.33,55.33,51.6,54.83,46.0,39.5,14.0,42.4,41.17
8,49.56,56.67,49.67,45.33,62.0,39.25,44.43,4.0,27.2,10.0
16,32.25,25.0,25.8,44.5,55.25,28.5,49.25,7.5,20.75,38.75
32,16.75,17.2,14.0,21.25,42.5,43.33,42.0,6.0,28.67,25.75
64,7.67,5.67,7.5,18.83,33.6,37.25,50.67,7.0,41.0,19.0
128,7.27,5.67,4.5,7.4,27.6,29.5,41.62,15.8,35.75,31.33
256,6.0,4.6,3.33,9.0,26.12,31.25,46.2,3.67,20.57,28.67
512,1.0,1.0,1.0,5.0,18.5,36.5,51.0,6.0,15.25,37.0
1024,1.0,1.0,1.0,1.0,19.2,41.75,50.6,5.33,17.0,27.67


### Best Test Recall@20

In [29]:
(
    experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="embedding_dimension",
        on="l1_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.028021,0.02683,0.027656,0.026286,0.026239,0.000969,0.000822,0.000572,0.000655,0.000667
4,0.035421,0.03564,0.035632,0.034933,0.035129,0.000856,0.000714,0.000601,0.000606,0.000657
8,0.038919,0.039391,0.038882,0.039266,0.040239,0.000835,0.000679,0.00059,0.000629,0.00063
16,0.039954,0.039759,0.040462,0.040408,0.043224,0.00084,0.000676,0.000566,0.000589,0.000624
32,0.04054,0.040881,0.04125,0.041556,0.04476,0.000818,0.000683,0.000551,0.000624,0.000614
64,0.04002,0.041657,0.041703,0.041594,0.045416,0.000831,0.000711,0.000541,0.000629,0.000619
128,0.039105,0.041531,0.041862,0.042525,0.046673,0.000802,0.000701,0.000554,0.000607,0.000636
256,0.038196,0.041301,0.041193,0.042063,0.04729,0.000828,0.00066,0.000532,0.0006,0.000617
512,0.040534,0.041669,0.042037,0.041253,0.047714,0.000831,0.000674,0.000585,0.000633,0.000631
1024,0.042053,0.042571,0.043222,0.043268,0.047753,0.000795,0.000657,0.000562,0.000591,0.000607


### Best Test NDCG@20

In [30]:
(
    experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="embedding_dimension",
        on="l1_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.080881,0.076956,0.079496,0.075915,0.076528,0.003734,0.002814,0.002277,0.002369,0.002334
4,0.101048,0.101263,0.100029,0.099579,0.099798,0.00316,0.002529,0.00211,0.002201,0.002352
8,0.110166,0.11107,0.110459,0.111534,0.112798,0.003143,0.00246,0.002072,0.002198,0.002279
16,0.112943,0.112572,0.114139,0.114551,0.122376,0.002973,0.002539,0.002112,0.002235,0.002192
32,0.11407,0.115561,0.116807,0.116688,0.125089,0.00308,0.002405,0.002072,0.00226,0.002309
64,0.113347,0.116229,0.117257,0.117662,0.126693,0.003021,0.00243,0.002054,0.002197,0.002186
128,0.11218,0.117618,0.117561,0.119577,0.13019,0.002912,0.002493,0.00205,0.002249,0.00216
256,0.108185,0.116869,0.116625,0.118328,0.131573,0.00296,0.002428,0.001917,0.002204,0.00225
512,0.112974,0.116273,0.11675,0.116771,0.133339,0.003005,0.002427,0.002101,0.002256,0.002211
1024,0.117659,0.117839,0.11948,0.12039,0.132359,0.002954,0.002392,0.001925,0.002163,0.002152


## L1 Regularization | Shuffle = True

In [31]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == True,
    pl.col("l2_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "embedding_dimension", "l1_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort("embedding_dimension", "l1_regularization")

### Run Counts

In [32]:
experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="l1_regularization"
)

  experiment_summary[["embedding_dimension", "l1_regularization", "num_runs"]].pivot(


embedding_dimension,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
i64,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
2,7,3,8,4,4,9,3,3,3,7
4,7,5,4,3,4,5,3,3,3,3
8,9,3,7,3,3,5,3,3,3,3
16,8,4,6,5,4,3,2,5,3,3
32,11,3,4,7,4,6,4,4,4,5
64,6,4,4,2,4,2,4,5,6,3
128,12,3,10,9,10,12,15,7,7,4
256,8,2,6,6,3,4,5,4,5,5
512,5,6,5,5,7,6,4,6,5,4
1024,6,4,6,6,4,5,3,4,3,5


### Best Epoch

In [33]:
(
    experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/epoch"]].pivot(
        values=["best:epoch/epoch"],
        index="embedding_dimension",
        on="l1_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="Blues", axis=None)
    .format("{:.2f}")
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,49.43,46.33,56.88,48.25,58.25,20.22,45.67,18.0,24.67,38.29
4,54.29,52.4,51.75,46.33,61.0,1.4,51.0,5.33,47.0,38.33
8,50.56,47.67,52.29,49.67,61.33,26.0,42.67,4.0,40.0,21.33
16,44.12,36.25,38.33,45.8,57.25,33.67,51.0,6.4,12.0,29.0
32,21.45,23.67,20.0,28.0,51.75,48.5,37.5,11.5,23.0,33.4
64,16.33,16.25,19.75,19.0,30.25,45.5,47.5,7.4,27.17,28.0
128,14.58,15.0,11.6,16.0,23.4,48.92,44.07,3.57,22.29,21.75
256,10.25,10.0,9.33,14.17,16.67,51.25,40.0,21.5,18.0,22.6
512,6.8,7.67,9.2,10.2,16.57,45.0,42.0,10.17,31.6,38.75
1024,0.0,1.25,0.0,0.0,13.5,34.2,51.67,10.25,41.33,39.2


### Best Test Recall@20

In [34]:
(
    experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="embedding_dimension",
        on="l1_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.026836,0.02811,0.026352,0.027689,0.026472,0.009493,0.000793,0.000652,0.00068,0.000718
4,0.034983,0.034839,0.034672,0.034054,0.035874,0.008691,0.000689,0.000596,0.000602,0.000645
8,0.038897,0.03897,0.03893,0.039577,0.040547,0.023306,0.000715,0.000625,0.000662,0.000577
16,0.040105,0.040053,0.039879,0.040207,0.043309,0.031818,0.000607,0.000554,0.000637,0.000606
32,0.039802,0.039859,0.039829,0.040821,0.044144,0.032159,0.000646,0.00054,0.000596,0.000592
64,0.039555,0.039763,0.039482,0.040673,0.044485,0.034216,0.000678,0.00057,0.000599,0.000616
128,0.038418,0.038617,0.038627,0.040153,0.044897,0.033991,0.000664,0.000552,0.000594,0.000623
256,0.037247,0.037592,0.037811,0.039457,0.045375,0.03432,0.000676,0.000549,0.0006,0.00061
512,0.035652,0.035984,0.036169,0.038023,0.045758,0.035485,0.000647,0.000568,0.000586,0.000611
1024,0.036986,0.03644,0.037874,0.037593,0.046157,0.034933,0.000645,0.000557,0.000602,0.000609


### Best Test NDCG@20

In [35]:
(
    experiment_summary[["embedding_dimension", "l1_regularization", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="embedding_dimension",
        on="l1_regularization"
    )
    .to_pandas()
    .set_index("embedding_dimension")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-10,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001,0.001,0.01
embedding_dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,0.078058,0.08137,0.076123,0.080396,0.076008,0.028311,0.00284,0.002261,0.002593,0.002648
4,0.099938,0.099085,0.09854,0.098458,0.101107,0.027767,0.002588,0.002179,0.002109,0.002373
8,0.10976,0.109884,0.110733,0.112334,0.114508,0.069098,0.002536,0.002041,0.002391,0.002208
16,0.113425,0.113423,0.112999,0.113826,0.122009,0.091434,0.002402,0.002032,0.002234,0.002268
32,0.113035,0.113029,0.113298,0.115517,0.123725,0.092424,0.002407,0.001938,0.00216,0.00215
64,0.112397,0.113374,0.112337,0.115521,0.124186,0.097775,0.002492,0.002032,0.002147,0.002292
128,0.109821,0.109903,0.110434,0.113905,0.126193,0.097293,0.002445,0.002011,0.002215,0.002298
256,0.106935,0.107466,0.108358,0.112283,0.127094,0.097747,0.002432,0.001961,0.002101,0.002236
512,0.102407,0.10411,0.103905,0.109402,0.127712,0.101037,0.002373,0.001998,0.002254,0.002301
1024,0.106575,0.105476,0.108838,0.109243,0.128846,0.099836,0.002379,0.002085,0.002175,0.002289


## ElasticNet, D=128, Shuffle = False
We want to further deep dive what will happen if we use both L1 and L2 (or better know as ElasticNet) at the same time on Matrix Factorization.

In [37]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == False,
    pl.col("embedding_dimension") == 128,
    pl.col("l1_regularization") != 1e-10, pl.col("l1_regularization") <= 1e-4,
    pl.col("l2_regularization") != 1e-10, pl.col("l2_regularization") <= 1e-4,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "l1_regularization", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort(
    "l1_regularization", "l2_regularization"
).with_columns(
    pl.col("l1_regularization").cast(pl.Utf8),
    pl.col("l2_regularization").cast(pl.Utf8)
)

### Run Counts

In [38]:
experiment_summary[["l1_regularization", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="l1_regularization",
    columns="l2_regularization"
)

  experiment_summary[["l1_regularization", "l2_regularization", "num_runs"]].pivot(


l1_regularization,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
str,u32,u32,u32,u32,u32,u32,u32
"""0.0""",15,7,8,13,8,8,6
"""1e-9""",10,3,5,3,2,4,8
"""1e-8""",5,3,2,5,6,11,3
"""1e-7""",10,3,5,7,6,7,5
"""1e-6""",6,3,2,2,6,4,2
"""0.00001""",8,2,3,6,4,3,6
"""0.0001""",5,7,4,3,7,5,7


### Best Epoch

In [39]:
(
    experiment_summary[["l1_regularization", "l2_regularization", "best:epoch/epoch"]].pivot(
        values=["best:epoch/epoch"],
        index="l1_regularization",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("l1_regularization")
    .style.background_gradient(cmap="Blues", axis=None)
    .format("{:.2f}")
)

Unnamed: 0_level_0,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
l1_regularization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,7.27,4.0,4.62,55.54,51.5,62.12,6.17
1e-09,4.5,3.0,5.6,58.33,50.0,1.0,1.0
1e-08,7.4,6.0,10.0,57.0,54.67,5.27,62.33
1e-07,27.6,27.67,32.2,53.29,53.17,39.71,42.0
1e-06,29.5,22.33,38.0,43.0,40.83,27.75,0.0
1e-05,41.62,40.0,45.0,42.0,49.5,2.0,40.5
0.0001,15.8,14.0,16.75,29.33,9.86,27.8,40.57


### Best Test Recall@20

In [40]:
(
    experiment_summary[["l1_regularization", "l2_regularization", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="l1_regularization",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("l1_regularization")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
l1_regularization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0.039105,0.041816,0.041981,0.048783,0.045145,0.013157,0.004336
1e-09,0.041862,0.042282,0.041881,0.049284,0.045172,0.00833,0.00184
1e-08,0.042525,0.042375,0.042587,0.049477,0.045104,0.007187,0.002129
1e-07,0.046673,0.046121,0.047049,0.050247,0.044102,0.002697,0.004949
1e-06,0.000802,0.000807,0.000821,0.000773,0.000874,0.00081,0.000466
1e-05,0.000701,0.000655,0.000697,0.000664,0.000643,0.00044,0.000555
0.0001,0.000554,0.000548,0.00055,0.000559,0.000552,0.000612,0.000622


### Best Test NDCG@20

In [41]:
(
    experiment_summary[["l1_regularization", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="l1_regularization",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("l1_regularization")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
l1_regularization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0.11218,0.118182,0.118205,0.134612,0.126499,0.040775,0.015099
1e-09,0.117561,0.11815,0.118094,0.135038,0.127528,0.027658,0.007161
1e-08,0.119577,0.119827,0.119294,0.136078,0.126447,0.02403,0.009092
1e-07,0.13019,0.128751,0.130131,0.137913,0.123769,0.010571,0.020474
1e-06,0.002912,0.003022,0.003037,0.003022,0.003115,0.003007,0.001618
1e-05,0.002493,0.002317,0.00236,0.002339,0.002446,0.001603,0.002047
0.0001,0.00205,0.002029,0.001982,0.002047,0.002009,0.00209,0.00222


## ElasticNet, D=128, Shuffle = True
We want to further deep dive what will happen if we use both L1 and L2 (or better know as ElasticNet) at the same time on Matrix Factorization.

In [42]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == True,
    pl.col("embedding_dimension") == 128,
    pl.col("l1_regularization") != 1e-10, pl.col("l1_regularization") <= 1e-4,
    pl.col("l2_regularization") != 1e-10, pl.col("l2_regularization") <= 1e-4,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "l1_regularization", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort(
    "l1_regularization", "l2_regularization"
).with_columns(
    pl.col("l1_regularization").cast(pl.Utf8),
    pl.col("l2_regularization").cast(pl.Utf8)
)
experiment_summary

l1_regularization,l2_regularization,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@20,best:epoch/test_ndcg@20
str,str,u32,f64,f64,f64,f64,f64
"""0.0""","""0.0""",12,14.583333,0.021834,0.085193,0.038418,0.109821
"""0.0""","""1e-9""",9,12.555556,0.022136,0.086464,0.038806,0.11102
"""0.0""","""1e-8""",10,15.4,0.02265,0.088068,0.039589,0.112997
"""0.0""","""1e-7""",16,58.0625,0.028076,0.106437,0.048684,0.134407
"""0.0""","""1e-6""",12,51.333333,0.025898,0.10048,0.044898,0.126195
"""0.0""","""0.00001""",8,57.5,0.012032,0.051095,0.021054,0.065463
"""0.0""","""0.0001""",10,11.2,0.002792,0.012626,0.004778,0.016941
"""1e-9""","""0.0""",10,11.6,0.021983,0.085908,0.038627,0.110434
"""1e-9""","""1e-9""",7,17.285714,0.022203,0.085697,0.039023,0.110561
"""1e-9""","""1e-8""",4,19.75,0.022763,0.088144,0.039877,0.113323


### Run Counts

In [43]:
experiment_summary[["l1_regularization", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="l1_regularization",
    columns="l2_regularization"
)

  experiment_summary[["l1_regularization", "l2_regularization", "num_runs"]].pivot(


l1_regularization,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
str,u32,u32,u32,u32,u32,u32,u32
"""0.0""",12,9,10,16,12,8,10
"""1e-9""",10,7,4,4,2,5,5
"""1e-8""",9,9,7,3,7,3,5
"""1e-7""",10,6,7,2,6,4,6
"""1e-6""",12,2,4,6,4,6,9
"""0.00001""",15,4,2,6,3,6,7
"""0.0001""",7,7,6,5,8,3,5


### Best Epoch

In [44]:
(
    experiment_summary[["l1_regularization", "l2_regularization", "best:epoch/epoch"]].pivot(
        values=["best:epoch/epoch"],
        index="l1_regularization",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("l1_regularization")
    .style.background_gradient(cmap="Blues", axis=None)
    .format("{:.2f}")
)

Unnamed: 0_level_0,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
l1_regularization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,14.58,12.56,15.4,58.06,51.33,57.5,11.2
1e-09,11.6,17.29,19.75,59.25,45.5,58.0,61.2
1e-08,16.0,17.67,22.29,58.67,59.57,57.33,59.4
1e-07,23.4,22.83,23.43,54.0,42.0,58.5,61.17
1e-06,48.92,24.5,49.0,46.33,22.5,35.67,0.56
1e-05,44.07,50.25,36.5,49.33,54.0,9.5,38.29
0.0001,3.57,12.43,5.5,12.4,13.88,21.0,44.8


### Best Test Recall@20

In [45]:
(
    experiment_summary[["l1_regularization", "l2_regularization", "best:epoch/test_recall@20"]].pivot(
        values=["best:epoch/test_recall@20"],
        index="l1_regularization",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("l1_regularization")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
l1_regularization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0.038418,0.038806,0.039589,0.048684,0.044898,0.021054,0.004778
1e-09,0.038627,0.039023,0.039877,0.048698,0.045148,0.020508,0.003351
1e-08,0.040153,0.040324,0.041442,0.049503,0.045028,0.020424,0.002356
1e-07,0.044897,0.044934,0.045462,0.049522,0.044442,0.022064,0.002213
1e-06,0.033991,0.031759,0.035241,0.035338,0.032858,0.000804,0.000484
1e-05,0.000664,0.000669,0.000641,0.000675,0.000693,0.000466,0.000559
0.0001,0.000552,0.000554,0.000537,0.000566,0.000531,0.000581,0.000642


### Best Test NDCG@20

In [46]:
(
    experiment_summary[["l1_regularization", "l2_regularization", "best:epoch/test_ndcg@20"]].pivot(
        values=["best:epoch/test_ndcg@20"],
        index="l1_regularization",
        on="l2_regularization"
    )
    .to_pandas()
    .set_index("l1_regularization")
    .style.background_gradient(cmap="RdYlGn", axis=None)
)

Unnamed: 0_level_0,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
l1_regularization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0.109821,0.11102,0.112997,0.134407,0.126195,0.065463,0.016941
1e-09,0.110434,0.110561,0.113323,0.134506,0.126176,0.064525,0.012739
1e-08,0.113905,0.114235,0.116818,0.13656,0.125889,0.063955,0.00946
1e-07,0.126193,0.125974,0.127672,0.135343,0.124198,0.066533,0.008567
1e-06,0.097293,0.091937,0.099727,0.101127,0.09465,0.002864,0.001762
1e-05,0.002445,0.002444,0.002284,0.002413,0.002469,0.001649,0.002003
0.0001,0.002011,0.001973,0.001989,0.001972,0.001973,0.002106,0.002218


## ElasticNet, D=256, Shuffle = False

In [50]:
experiment_summary = experiment_runs.filter(
    pl.col("shuffle") == False,
    pl.col("embedding_dimension") == 256,
    pl.col("l1_regularization") != 1e-10, pl.col("l1_regularization") <= 1e-4,
    pl.col("l2_regularization") != 1e-10, pl.col("l2_regularization") <= 1e-4,
    pl.col("embedding_dropout_rate") == 0.0
).group_by(
    "l1_regularization", "l2_regularization"
).agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@20").mean(),
    pl.col("best:epoch/test_ndcg@20").mean(),
).sort(
    "l1_regularization", "l2_regularization"
).with_columns(
    pl.col("l1_regularization").cast(pl.Utf8),
    pl.col("l2_regularization").cast(pl.Utf8)
)

### Run Counts

In [51]:
experiment_summary[["l1_regularization", "l2_regularization", "num_runs"]].pivot(
    values=["num_runs"],
    index="l1_regularization",
    columns="l2_regularization"
)

  experiment_summary[["l1_regularization", "l2_regularization", "num_runs"]].pivot(


l1_regularization,0.0,1e-9,1e-8,1e-7,1e-6,0.00001,0.0001
str,u32,u32,u32,u32,u32,u32,u32
"""0.0""",10,4.0,4.0,5.0,4.0,5.0,4.0
"""1e-9""",3,,,,1.0,1.0,1.0
"""1e-8""",3,,,1.0,2.0,1.0,
"""1e-7""",8,,2.0,2.0,,,
"""1e-6""",4,,,,,1.0,
"""0.00001""",5,,,1.0,1.0,2.0,
"""0.0001""",3,,,,2.0,1.0,2.0


# Cross-GPU Training

In [47]:
experiment_runs.group_by("embedding_dimension").agg(
    pl.col("run_duration_minute").mean()
).sort("embedding_dimension")

embedding_dimension,run_duration_minute
i64,f64
2,12.446437
4,12.438744
8,12.333611
16,12.487377
32,12.445287
64,12.740832
128,12.348307
256,12.778
512,13.610148
1024,16.095069


In [42]:
experiment_runs.group_by("gpu_type").agg(
    pl.col("run_duration_minute").mean(),
    
).sort("gpu_type")

gpu_type,run_duration_minute
str,f64
"""NVIDIA A10G""",12.554927
"""NVIDIA L4""",17.202939


In [43]:
experiment_summary = experiment_runs.filter(
    pl.col("l2_regularization") == 0,
    pl.col("l1_regularization") == 0.0,
    pl.col("embedding_dropout_rate") == 0.0
).group_by("embedding_dimension", "gpu_type").agg(
    pl.col("run_id").count().alias("num_runs"),
    pl.col("best:epoch/epoch").mean(),
    pl.col("best:epoch/test_recall@10").mean(),
    pl.col("best:epoch/test_ndcg@10").mean(),
    pl.col("best:epoch/test_recall@50").mean(),
    pl.col("best:epoch/test_ndcg@50").mean(),
).sort("embedding_dimension", "gpu_type")
experiment_summary

embedding_dimension,gpu_type,num_runs,best:epoch/epoch,best:epoch/test_recall@10,best:epoch/test_ndcg@10,best:epoch/test_recall@50,best:epoch/test_ndcg@50
i64,str,u32,f64,f64,f64,f64,f64
2,"""NVIDIA A10G""",11,49.909091,0.015237,0.059456,0.054251,0.105763
2,"""NVIDIA L4""",2,54.5,0.01585,0.061646,0.056766,0.1099
4,"""NVIDIA A10G""",16,47.75,0.0199,0.077285,0.070973,0.132605
8,"""NVIDIA A10G""",17,50.176471,0.021985,0.085121,0.078469,0.143825
8,"""NVIDIA L4""",1,48.0,0.022055,0.08557,0.078698,0.144032
16,"""NVIDIA A10G""",15,39.2,0.022679,0.087946,0.080375,0.14723
16,"""NVIDIA L4""",1,23.0,0.023082,0.089788,0.081257,0.148892
32,"""NVIDIA A10G""",15,20.2,0.022781,0.088311,0.080075,0.147322
64,"""NVIDIA A10G""",12,12.0,0.022747,0.088153,0.079196,0.146453
128,"""NVIDIA A10G""",26,10.653846,0.022288,0.086847,0.077404,0.14446


In [44]:
experiment_summary[["embedding_dimension", "gpu_type", "num_runs"]].pivot(
    values=["num_runs"],
    index="embedding_dimension",
    columns="gpu_type"
)

  experiment_summary[["embedding_dimension", "gpu_type", "num_runs"]].pivot(


embedding_dimension,NVIDIA A10G,NVIDIA L4
i64,u32,u32
2,11,2.0
4,16,
8,17,1.0
16,15,1.0
32,15,
64,12,
128,26,1.0
256,17,
512,9,2.0
1024,14,


In [45]:
experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_recall@50"]].pivot(
    values=["best:epoch/test_recall@50"],
    index="embedding_dimension",
    columns="gpu_type"
)

  experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_recall@50"]].pivot(


embedding_dimension,NVIDIA A10G,NVIDIA L4
i64,f64,f64
2,0.054251,0.056766
4,0.070973,
8,0.078469,0.078698
16,0.080375,0.081257
32,0.080075,
64,0.079196,
128,0.077404,0.077215
256,0.075118,
512,0.07512,0.075376
1024,0.077101,


In [46]:
experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_ndcg@50"]].pivot(
    values=["best:epoch/test_ndcg@50"],
    index="embedding_dimension",
    columns="gpu_type"
)

  experiment_summary[["embedding_dimension", "gpu_type", "best:epoch/test_ndcg@50"]].pivot(


embedding_dimension,NVIDIA A10G,NVIDIA L4
i64,f64,f64
2,0.105763,0.1099
4,0.132605,
8,0.143825,0.144032
16,0.14723,0.148892
32,0.147322,
64,0.146453,
128,0.14446,0.145288
256,0.140154,
512,0.139369,0.140036
1024,0.143154,
