In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
ORG_NAME = "Frax"
SPLITS_FREQ = "5d"
LAST_FOLDS = 10
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2022-05-26"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 359 entries, 0 to 358
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 359 non-null    object        
 1   author             359 non-null    object        
 2   date               359 non-null    datetime64[us]
 3   start              359 non-null    datetime64[us]
 4   end                359 non-null    datetime64[us]
 5   platform_proposal  359 non-null    object        
dtypes: datetime64[us](3), object(3)
memory usage: 17.0+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10184 entries, 0 to 10183
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        10184 non-null  object        
 1   proposal  10184 non-null  object        
 2   voter     10184 non-null  object        
 3   date      10184 non-null  datetime64[us]
dtypes:

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0xc049dd035e0171379fc608cef3260c1e6bceaaef,35601a29-c21f-5c8b-8176-a61cd9dc7d84,2021-02-10 22:33:15,1
1,0x3ce00d8677a44779fab24d897bfcafb4b96c9204,35601a29-c21f-5c8b-8176-a61cd9dc7d84,2021-02-11 03:36:59,1
2,0xb3eb02ac81c49a20290eeb7e8a5f66da6fb1e3f0,35601a29-c21f-5c8b-8176-a61cd9dc7d84,2021-02-11 08:01:51,1
3,0x09d57f361f794d150c04443a7b6e369120dc2bea,35601a29-c21f-5c8b-8176-a61cd9dc7d84,2021-02-11 08:02:27,1
4,0x3c28c42b24b7909c8292920929f083f60c4997a6,35601a29-c21f-5c8b-8176-a61cd9dc7d84,2021-02-11 11:51:03,1
...,...,...,...,...
10179,0xb1417c3a3d10102e57bc123827b942fb60dca1e4,1c6c0bf0-f2b4-5998-ab91-76e35a86a0c5,2023-07-17 12:59:55,1
10180,0x724061efdfef4a421e8be05133ad24922d07b5bf,57e9175b-41a6-5aaf-bd1c-6a13093b5419,2023-07-17 22:49:06,1
10181,0x337844866ce1f51be8eb9f3a4882b5cb891edba5,57e9175b-41a6-5aaf-bd1c-6a13093b5419,2023-07-19 00:02:32,1
10182,0x4b47052498d469ac9521606a67d6f02afeb0985a,57e9175b-41a6-5aaf-bd1c-6a13093b5419,2023-07-19 12:31:54,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2022-04-11 00:00:00'),
 Timestamp('2022-04-16 00:00:00'),
 Timestamp('2022-04-21 00:00:00'),
 Timestamp('2022-04-26 00:00:00'),
 Timestamp('2022-05-01 00:00:00'),
 Timestamp('2022-05-06 00:00:00'),
 Timestamp('2022-05-11 00:00:00'),
 Timestamp('2022-05-16 00:00:00'),
 Timestamp('2022-05-21 00:00:00'),
 Timestamp('2022-05-26 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]


fold_t             2022-05-26 00:00:00
time_train                    5.591835
time_rec                      0.153363
open_proposals                       1
min_recs                             1
avg_recs                           1.0
precision@1                        1.0
precision@3                   0.333333
precision@5                        0.2
precision@10                       0.1
precision@15                  0.066667
precision@100                     0.01
ndcg@1                        0.714286
ndcg@3                        0.714286
ndcg@5                        0.714286
ndcg@10                       0.714286
ndcg@15                       0.714286
ndcg@100                      0.714286
map@1                              1.0
map@3                              1.0
map@5                              1.0
map@10                             1.0
map@15                             1.0
map@100                            1.0
recall@1                           1.0
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/Frax/hparams-itemknn_5d_normalize.pkl with 709 results


  0%|          | 0/770 [00:00<?, ?it/s]























[2024-09-16T14:10:13.659560] Saving checkpoint at ../.cache/Frax/hparams-itemknn_5d_normalize.pkl


Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2022-04-11,1,7d,2022-04-11,0.013103,0.002009,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.077649
1,2022-04-11,1,14d,2022-04-11,0.001106,0.002922,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.118867
2,2022-04-11,1,21d,2022-04-11,0.001377,0.003306,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.118277
3,2022-04-11,1,30d,2022-04-11,0.003108,0.003563,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.118301
4,2022-04-11,1,60d,2022-04-11,0.001658,0.004535,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.119437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2022-05-26,15,21d,2022-05-26,0.001243,0.020446,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123978
766,2022-05-26,15,30d,2022-05-26,0.002648,0.022339,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124773
767,2022-05-26,15,60d,2022-05-26,0.002052,0.024480,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124873
768,2022-05-26,15,90d,2022-05-26,0.001645,0.025330,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124096


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
14d,2,0.001746,1.758069,0.270137,0.135068,0.013507,0.578422,0.578422,0.578422,0.910319,0.910319,0.910319,1.0,1.0,1.0,0.968519,0.968519,0.968519
14d,10,0.003524,1.758069,0.270137,0.135068,0.013507,0.577903,0.577903,0.577903,0.909437,0.909437,0.909437,1.0,1.0,1.0,0.968519,0.968519,0.968519
14d,1,0.002390,1.758069,0.270137,0.135068,0.013507,0.577903,0.577903,0.577903,0.909437,0.909437,0.909437,1.0,1.0,1.0,0.968519,0.968519,0.968519
14d,15,0.002124,1.758069,0.270137,0.135068,0.013507,0.577903,0.577903,0.577903,0.909437,0.909437,0.909437,1.0,1.0,1.0,0.968519,0.968519,0.968519
14d,9,0.003063,1.758069,0.270137,0.135068,0.013507,0.577903,0.577903,0.577903,0.909437,0.909437,0.909437,1.0,1.0,1.0,0.968519,0.968519,0.968519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90d,4,0.002354,1.868358,0.251691,0.125845,0.012585,0.794279,0.794279,0.794279,0.851281,0.851281,0.851281,1.0,1.0,1.0,0.961844,0.961844,0.961844
90d,9,0.005211,1.868358,0.251691,0.125845,0.012585,0.794279,0.794279,0.794279,0.851281,0.851281,0.851281,1.0,1.0,1.0,0.961844,0.961844,0.961844
60d,4,0.003459,1.861626,0.253632,0.126816,0.012682,0.783779,0.783779,0.783779,0.851063,0.851063,0.851063,1.0,1.0,1.0,0.963624,0.963624,0.963624
60d,2,0.002948,1.861626,0.253632,0.126816,0.012682,0.783611,0.783611,0.783611,0.850810,0.850810,0.850810,1.0,1.0,1.0,0.963624,0.963624,0.963624


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Frax/models/itemknn-best-avg_5d_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-04-11,14d,2,2022-04-11,0.001011,0.002849,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.12108
2022-04-16,14d,2,2022-04-16,0.001164,0.013211,5,2.0,4.0,0.727273,0.515152,0.490909,0.245455,...,1.0,1.0,1.0,0.458333,0.666667,0.716667,0.716667,0.716667,0.716667,0.12892
2022-04-21,14d,2,2022-04-21,0.002187,0.011854,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.125453
2022-04-26,14d,2,2022-04-26,0.002077,0.031267,2,1.0,1.566667,0.633333,0.388889,0.233333,0.116667,...,1.0,1.0,1.0,0.868421,1.0,1.0,1.0,1.0,1.0,0.129071
2022-05-01,14d,2,2022-05-01,0.001196,0.028131,2,1.0,1.666667,0.814815,0.432099,0.259259,0.12963,...,1.0,1.0,1.0,0.818182,1.0,1.0,1.0,1.0,1.0,0.129877
2022-05-06,14d,2,2022-05-06,0.003479,0.011163,2,1.0,1.875,1.0,0.625,0.375,0.1875,...,1.0,1.0,1.0,0.5625,1.0,1.0,1.0,1.0,1.0,0.125087
2022-05-11,14d,2,2022-05-11,0.001192,0.013972,2,2.0,2.0,0.909091,0.454545,0.272727,0.136364,...,1.0,1.0,1.0,0.8,1.0,1.0,1.0,1.0,1.0,0.127244
2022-05-16,14d,2,2022-05-16,0.00121,0.004812,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123614
2022-05-21,14d,2,2022-05-21,0.002174,0.023129,3,1.0,1.714286,0.52381,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127586
2022-05-26,14d,2,2022-05-26,0.001035,0.016832,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124733


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Frax/models/itemknn-best-val_5d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-04-11,1,14d,0.001106,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-16,15,7d,0.001273,2.8,0.36,0.18,0.018,0.157608,0.157608,0.157608,0.866667,0.866667,0.866667,1.0,1.0,1.0,0.8,0.8,0.8
2022-04-21,10,7d,0.001105,1.0,0.2,0.1,0.01,0.636364,0.636364,0.636364,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-26,7,7d,0.002919,1.5,0.223077,0.111538,0.011154,0.493008,0.493008,0.493008,0.826923,0.826923,0.826923,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-01,4,7d,0.002811,1.5,0.255556,0.127778,0.012778,0.511903,0.511903,0.511903,0.916667,0.916667,0.916667,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-06,1,7d,0.001259,1.75,0.35,0.175,0.0175,0.173913,0.173913,0.173913,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-11,1,60d,0.001436,2.0,0.257143,0.128571,0.012857,0.851933,0.851933,0.851933,0.964286,0.964286,0.964286,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-16,3,14d,0.00405,1.0,0.2,0.1,0.01,0.5,0.5,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-21,15,7d,0.002326,1.5,0.2,0.1,0.01,0.541237,0.541237,0.541237,0.787037,0.787037,0.787037,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-26,6,90d,0.015604,1.0,0.2,0.1,0.01,0.904762,0.904762,0.904762,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2022-05-06 00:00:00,0.003643,0.015212,2.111111,1.222222,1.561111,0.869651,0.408477,0.249531,0.124765,...,1.0,1.0,1.0,0.87125,0.977778,0.977778,0.977778,0.977778,0.977778,0.12636
min,2022-04-16 00:00:00,0.001105,0.004829,1.0,1.0,1.0,0.611111,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.625,0.8,0.8,0.8,0.8,0.8,0.12317
25%,2022-04-26 00:00:00,0.001273,0.006999,1.0,1.0,1.0,0.8,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.125827
50%,2022-05-06 00:00:00,0.002326,0.018656,2.0,1.0,1.5,0.928571,0.371795,0.223077,0.111538,...,1.0,1.0,1.0,0.911765,1.0,1.0,1.0,1.0,1.0,0.126491
75%,2022-05-16 00:00:00,0.002919,0.01935,2.0,1.0,1.75,1.0,0.428571,0.257143,0.128571,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127544
max,2022-05-26 00:00:00,0.015604,0.026299,5.0,2.0,2.8,1.0,0.583333,0.36,0.18,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.128148
std,,0.004594,0.008377,1.269296,0.440959,0.582976,0.154523,0.094077,0.064133,0.032067,...,0.0,0.0,0.0,0.154634,0.066667,0.066667,0.066667,0.066667,0.066667,0.001602


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Frax/models/itemknn-best-test_5d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-04-16,1,14d,0.001133,4.0,0.490909,0.245455,0.024545,0.326319,0.326319,0.326319,0.784091,0.784091,0.784091,1.0,1.0,1.0,0.716667,0.716667,0.716667
2022-04-21,15,7d,0.001124,1.0,0.2,0.1,0.01,0.636364,0.636364,0.636364,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-26,10,7d,0.001151,1.5,0.223077,0.111538,0.011154,0.493008,0.493008,0.493008,0.826923,0.826923,0.826923,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-01,7,7d,0.005849,1.5,0.255556,0.127778,0.012778,0.511903,0.511903,0.511903,0.916667,0.916667,0.916667,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-06,4,7d,0.001351,1.75,0.35,0.175,0.0175,0.173913,0.173913,0.173913,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-11,1,7d,0.001423,2.0,0.3,0.15,0.015,0.305799,0.305799,0.305799,0.75,0.75,0.75,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-16,1,60d,0.001435,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-21,3,14d,0.001143,1.714286,0.2,0.1,0.01,0.594809,0.594809,0.594809,0.722222,0.722222,0.722222,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-26,15,7d,0.00149,1.0,0.2,0.1,0.01,0.52381,0.52381,0.52381,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2022-05-06 00:00:00,0.001789,0.01419,2.111111,1.222222,1.718254,0.804251,0.414393,0.268838,0.134419,...,1.0,1.0,1.0,0.81427,0.962963,0.968519,0.968519,0.968519,0.968519,0.126014
min,2022-04-16 00:00:00,0.001124,0.006023,1.0,1.0,1.0,0.5,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.458333,0.666667,0.716667,0.716667,0.716667,0.716667,0.122234
25%,2022-04-26 00:00:00,0.001143,0.009543,1.0,1.0,1.0,0.653846,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.625,1.0,1.0,1.0,1.0,1.0,0.124874
50%,2022-05-06 00:00:00,0.001351,0.012568,2.0,1.0,1.5,0.833333,0.371795,0.223077,0.111538,...,1.0,1.0,1.0,0.911765,1.0,1.0,1.0,1.0,1.0,0.126045
75%,2022-05-16 00:00:00,0.001435,0.018716,2.0,1.0,1.75,1.0,0.5,0.3,0.15,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127412
max,2022-05-26 00:00:00,0.005849,0.026358,5.0,2.0,4.0,1.0,0.583333,0.490909,0.245455,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.128736
std,,0.00153,0.006944,1.269296,0.440959,0.930706,0.210321,0.096341,0.098805,0.049402,...,0.0,0.0,0.0,0.226145,0.111111,0.094444,0.094444,0.094444,0.094444,0.002055


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))

fold_t             2022-05-26 00:00:00
time_train                    0.702219
time_rec                      0.755017
open_proposals                       1
min_recs                             1
avg_recs                           1.0
precision@1                        1.0
precision@3                   0.333333
precision@5                        0.2
precision@10                       0.1
precision@15                  0.066667
precision@100                     0.01
ndcg@1                        0.714286
ndcg@3                        0.714286
ndcg@5                        0.714286
ndcg@10                       0.714286
ndcg@15                       0.714286
ndcg@100                      0.714286
map@1                              1.0
map@3                              1.0
map@5                              1.0
map@10                             1.0
map@15                             1.0
map@100                            1.0
recall@1                           1.0
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

  0%|          | 0/770 [00:00<?, ?it/s]























[2024-09-16T14:12:11.090726] Saving checkpoint at ../.cache/Frax/hparams-userknn_5d_normalize.pkl


Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2022-04-11,1,7d,2022-04-11,0.000459,0.001247,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.072377
1,2022-04-11,1,14d,2022-04-11,0.000319,0.001968,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.119966
2,2022-04-11,1,21d,2022-04-11,0.000407,0.002395,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.122374
3,2022-04-11,1,30d,2022-04-11,0.000593,0.002497,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.121201
4,2022-04-11,1,60d,2022-04-11,0.000668,0.003372,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.122352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2022-05-26,15,21d,2022-05-26,0.000484,0.013904,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124318
766,2022-05-26,15,30d,2022-05-26,0.000577,0.014912,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123987
767,2022-05-26,15,60d,2022-05-26,0.000742,0.016373,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126220
768,2022-05-26,15,90d,2022-05-26,0.000889,0.016916,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124962


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,5,0.000483,1.561111,0.254292,0.127146,0.012715,0.399294,0.399294,0.399294,0.905255,0.905255,0.905255,1.0,1.0,1.0,0.977778,0.977778,0.977778
7d,15,0.000446,1.561111,0.254292,0.127146,0.012715,0.399294,0.399294,0.399294,0.905255,0.905255,0.905255,1.0,1.0,1.0,0.977778,0.977778,0.977778
7d,10,0.000443,1.561111,0.254292,0.127146,0.012715,0.399294,0.399294,0.399294,0.905255,0.905255,0.905255,1.0,1.0,1.0,0.977778,0.977778,0.977778
7d,9,0.000414,1.561111,0.254292,0.127146,0.012715,0.399294,0.399294,0.399294,0.905255,0.905255,0.905255,1.0,1.0,1.0,0.977778,0.977778,0.977778
7d,8,0.000451,1.561111,0.254292,0.127146,0.012715,0.399294,0.399294,0.399294,0.905255,0.905255,0.905255,1.0,1.0,1.0,0.977778,0.977778,0.977778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10YE,9,0.002241,1.872147,0.252231,0.126116,0.012612,0.805325,0.805325,0.805325,0.847139,0.847139,0.847139,1.0,1.0,1.0,0.964693,0.964693,0.964693
90d,8,0.000826,1.868358,0.251691,0.125845,0.012585,0.789956,0.789956,0.789956,0.844579,0.844579,0.844579,1.0,1.0,1.0,0.961640,0.961640,0.961640
10YE,6,0.002127,1.872147,0.252231,0.126116,0.012612,0.803219,0.803219,0.803219,0.843512,0.843512,0.843512,1.0,1.0,1.0,0.970391,0.970391,0.970391
10YE,7,0.002147,1.872147,0.252231,0.126116,0.012612,0.802374,0.802374,0.802374,0.842509,0.842509,0.842509,1.0,1.0,1.0,0.973240,0.973240,0.973240


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Frax/models/userknn-best-avg_5d_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-04-11,7d,5,2022-04-11,0.000347,0.001214,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069396
2022-04-16,7d,5,2022-04-16,0.000478,0.004844,5,2.0,2.8,0.8,0.533333,0.36,0.18,...,1.0,1.0,1.0,0.625,0.8,0.8,0.8,0.8,0.8,0.136697
2022-04-21,7d,5,2022-04-21,0.000976,0.018888,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.14756
2022-04-26,7d,5,2022-04-26,0.000505,0.019494,2,1.0,1.5,0.653846,0.371795,0.223077,0.111538,...,1.0,1.0,1.0,0.911765,1.0,1.0,1.0,1.0,1.0,0.128543
2022-05-01,7d,5,2022-05-01,0.000408,0.013079,2,1.0,1.5,0.833333,0.425926,0.255556,0.127778,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.12641
2022-05-06,7d,5,2022-05-06,0.000407,0.004195,2,1.0,1.75,1.0,0.583333,0.35,0.175,...,1.0,1.0,1.0,0.625,1.0,1.0,1.0,1.0,1.0,0.128155
2022-05-11,7d,5,2022-05-11,0.000424,0.005393,2,2.0,2.0,0.5,0.5,0.3,0.15,...,1.0,1.0,1.0,0.5,1.0,1.0,1.0,1.0,1.0,0.125804
2022-05-16,7d,5,2022-05-16,0.000409,0.001956,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.120033
2022-05-21,7d,5,2022-05-21,0.000393,0.013128,3,1.0,1.5,0.611111,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126763
2022-05-26,7d,5,2022-05-26,0.000346,0.0082,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124398


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Frax/models/userknn-best-val_5d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-04-11,1,14d,0.000319,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-16,15,7d,0.000807,2.8,0.36,0.18,0.018,0.157608,0.157608,0.157608,0.866667,0.866667,0.866667,1.0,1.0,1.0,0.8,0.8,0.8
2022-04-21,10,90d,0.000761,1.0,0.2,0.1,0.01,0.909091,0.909091,0.909091,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-26,1,14d,0.000429,1.566667,0.233333,0.116667,0.011667,0.571941,0.571941,0.571941,0.833333,0.833333,0.833333,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-01,6,7d,0.000428,1.5,0.255556,0.127778,0.012778,0.511903,0.511903,0.511903,0.916667,0.916667,0.916667,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-06,1,7d,0.000416,1.75,0.35,0.175,0.0175,0.173913,0.173913,0.173913,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-11,1,10YE,0.002097,2.0,0.257143,0.128571,0.012857,0.851933,0.851933,0.851933,0.964286,0.964286,0.964286,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-16,8,7d,0.000438,1.0,0.2,0.1,0.01,0.25,0.25,0.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-21,1,90d,0.000958,1.961538,0.238462,0.119231,0.011923,0.819832,0.819832,0.819832,0.842949,0.842949,0.842949,1.0,1.0,1.0,0.975,0.975,0.975
2022-05-26,8,7d,0.000322,1.0,0.2,0.1,0.01,0.52381,0.52381,0.52381,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,0.00074,1.619801,0.254944,0.127472,0.012747,0.530003,0.530003,0.530003,0.935989,0.935989,0.935989,1.0,1.0,1.0,0.975,0.975,0.975
min,0.000322,1.0,0.2,0.1,0.01,0.157608,0.157608,0.157608,0.833333,0.833333,0.833333,1.0,1.0,1.0,0.8,0.8,0.8
25%,0.000428,1.0,0.2,0.1,0.01,0.25,0.25,0.25,0.866667,0.866667,0.866667,1.0,1.0,1.0,1.0,1.0,1.0
50%,0.000438,1.566667,0.238462,0.119231,0.011923,0.52381,0.52381,0.52381,0.964286,0.964286,0.964286,1.0,1.0,1.0,1.0,1.0,1.0
75%,0.000807,1.961538,0.257143,0.128571,0.012857,0.819832,0.819832,0.819832,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,0.002097,2.8,0.36,0.18,0.018,0.909091,0.909091,0.909091,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,0.000554,0.595902,0.061107,0.030554,0.003055,0.290979,0.290979,0.290979,0.07208,0.07208,0.07208,0.0,0.0,0.0,0.066144,0.066144,0.066144


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Frax/models/userknn-best-test_5d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-04-16,1,14d,2022-04-16,0.000355,0.008783,5,2.0,4.0,0.272727,0.484848,0.490909,0.245455,...,1.0,1.0,1.0,0.5,0.592593,0.648148,0.648148,0.648148,0.648148,0.127457
2022-04-21,15,7d,2022-04-21,0.000413,0.006228,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124831
2022-04-26,10,90d,2022-04-26,0.000785,0.029025,2,1.0,1.657895,0.578947,0.403509,0.242105,0.121053,...,1.0,1.0,1.0,0.818182,1.0,1.0,1.0,1.0,1.0,0.131301
2022-05-01,1,14d,2022-05-01,0.000503,0.020255,2,1.0,1.666667,0.814815,0.432099,0.259259,0.12963,...,1.0,1.0,1.0,0.818182,1.0,1.0,1.0,1.0,1.0,0.127908
2022-05-06,6,7d,2022-05-06,0.000437,0.003887,2,1.0,1.75,1.0,0.583333,0.35,0.175,...,1.0,1.0,1.0,0.625,1.0,1.0,1.0,1.0,1.0,0.124938
2022-05-11,1,7d,2022-05-11,0.000352,0.005297,2,2.0,2.0,0.5,0.5,0.3,0.15,...,1.0,1.0,1.0,0.5,1.0,1.0,1.0,1.0,1.0,0.124941
2022-05-16,1,10YE,2022-05-16,0.002176,0.012181,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.120273
2022-05-21,8,7d,2022-05-21,0.000437,0.013149,3,1.0,1.5,0.611111,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126553
2022-05-26,1,90d,2022-05-26,0.000847,0.016798,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124909


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2022-05-06 00:00:00,0.000701,0.012845,2.111111,1.222222,1.730507,0.753067,0.415236,0.271364,0.135682,...,1.0,1.0,1.0,0.806818,0.954733,0.960905,0.960905,0.960905,0.960905,0.125901
min,2022-04-16 00:00:00,0.000352,0.003887,1.0,1.0,1.0,0.272727,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.5,0.592593,0.648148,0.648148,0.648148,0.648148,0.120273
25%,2022-04-26 00:00:00,0.000413,0.006228,1.0,1.0,1.0,0.578947,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.625,1.0,1.0,1.0,1.0,1.0,0.124909
50%,2022-05-06 00:00:00,0.000437,0.012181,2.0,1.0,1.657895,0.814815,0.403509,0.242105,0.121053,...,1.0,1.0,1.0,0.818182,1.0,1.0,1.0,1.0,1.0,0.124941
75%,2022-05-16 00:00:00,0.000785,0.016798,2.0,1.0,1.75,1.0,0.484848,0.3,0.15,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127457
max,2022-05-26 00:00:00,0.002176,0.029025,5.0,2.0,4.0,1.0,0.583333,0.490909,0.245455,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.131301
std,,0.000582,0.008142,1.269296,0.440959,0.927834,0.272249,0.091884,0.097838,0.048919,...,0.0,0.0,0.0,0.21531,0.135802,0.117284,0.117284,0.117284,0.117284,0.002993
