In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-07-03"
ORG_NAME = "Plaza"
SPLITS_FREQ = "3d"
LAST_FOLDS = 20
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2022-06-29"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 419 entries, 0 to 418
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 419 non-null    object        
 1   author             419 non-null    object        
 2   date               419 non-null    datetime64[us]
 3   start              419 non-null    datetime64[us]
 4   end                416 non-null    datetime64[us]
 5   platform_proposal  419 non-null    object        
dtypes: datetime64[us](3), object(3)
memory usage: 19.8+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 534 entries, 0 to 533
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        534 non-null    object        
 1   proposal  534 non-null    object        
 2   voter     534 non-null    object        
 3   date      534 non-null    datetime64[us]
dtypes: dat

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0xd1629474d25a63b1018fcc965e1d218a00f6cbd3,93673fce-71b4-5182-b6ec-e178735412c6,2021-08-06 00:09:50,1
1,0xd1629474d25a63b1018fcc965e1d218a00f6cbd3,28d70b46-cb6e-523d-8a61-61e9ef4ba177,2021-08-07 13:33:45,1
2,0xd1629474d25a63b1018fcc965e1d218a00f6cbd3,3978518b-82b8-5391-b005-f901adc496e5,2021-08-08 12:20:05,1
3,0xd1629474d25a63b1018fcc965e1d218a00f6cbd3,5e18612e-cb1f-5d5c-b919-ae7b22b61913,2021-08-08 20:04:05,1
4,0xd1629474d25a63b1018fcc965e1d218a00f6cbd3,07b44ec6-c87b-5fb7-9661-0a0204b69f58,2021-08-08 20:04:15,1
...,...,...,...,...
529,0x1e9c89aff77215f3ad26bffe0c50d4fdeba6a352,fbfb0022-9f70-5ced-8d91-e907dd141a21,2023-04-25 17:07:30,1
530,0x1e9c89aff77215f3ad26bffe0c50d4fdeba6a352,d761260c-ff6b-568c-b1b7-d0715469b834,2023-04-25 17:08:15,1
531,0x1e9c89aff77215f3ad26bffe0c50d4fdeba6a352,b354250b-af5b-5caf-ba86-914269d2a25d,2023-06-21 10:10:30,1
532,0x1e9c89aff77215f3ad26bffe0c50d4fdeba6a352,b6a2a953-6beb-511f-8c73-8fe1385282a5,2023-07-19 01:34:15,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2022-05-03 00:00:00'),
 Timestamp('2022-05-06 00:00:00'),
 Timestamp('2022-05-09 00:00:00'),
 Timestamp('2022-05-12 00:00:00'),
 Timestamp('2022-05-15 00:00:00'),
 Timestamp('2022-05-18 00:00:00'),
 Timestamp('2022-05-21 00:00:00'),
 Timestamp('2022-05-24 00:00:00'),
 Timestamp('2022-05-27 00:00:00'),
 Timestamp('2022-05-30 00:00:00'),
 Timestamp('2022-06-02 00:00:00'),
 Timestamp('2022-06-05 00:00:00'),
 Timestamp('2022-06-08 00:00:00'),
 Timestamp('2022-06-11 00:00:00'),
 Timestamp('2022-06-14 00:00:00'),
 Timestamp('2022-06-17 00:00:00'),
 Timestamp('2022-06-20 00:00:00'),
 Timestamp('2022-06-23 00:00:00'),
 Timestamp('2022-06-26 00:00:00'),
 Timestamp('2022-06-29 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]




fold_t             2022-06-29 00:00:00
time_train                    5.395721
time_rec                      0.001574
open_proposals                       1
min_recs                           NaN
avg_recs                           NaN
precision@1                        0.0
precision@3                        0.0
precision@5                        0.0
precision@10                       0.0
precision@100                      0.0
ndcg@1                             0.0
ndcg@3                             0.0
ndcg@5                             0.0
ndcg@10                            0.0
ndcg@100                           0.0
map@1                              0.0
map@3                              0.0
map@5                              0.0
map@10                             0.0
map@100                            0.0
recall@1                           0.0
recall@3                           0.0
recall@5                           0.0
recall@10                          0.0
recall@100               

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

  0%|          | 0/1540 [00:00<?, ?it/s]



























































































































































































































































































































































[2024-07-04T08:32:20.404786] Saving checkpoint at ../.cache/Plaza/hparams-itemknn_3d_normalize.pkl






























































































































































































































































































































































Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
0,2022-05-03,1,7d,2022-05-03,0.006026,0.000860,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.059266
1,2022-05-03,1,14d,2022-05-03,0.001036,0.000737,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057863
2,2022-05-03,1,21d,2022-05-03,0.004788,0.000748,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058366
3,2022-05-03,1,30d,2022-05-03,0.000961,0.000696,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058682
4,2022-05-03,1,60d,2022-05-03,0.015133,0.000781,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057171
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1535,2022-06-29,15,21d,2022-06-29,0.002101,0.000674,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058982
1536,2022-06-29,15,30d,2022-06-29,0.015015,0.000718,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057405
1537,2022-06-29,15,60d,2022-06-29,0.011482,0.000709,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057332
1538,2022-06-29,15,90d,2022-06-29,0.001312,0.000715,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057171


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
30d,1,0.005258,3.666667,0.063158,0.031579,0.003158,0.146486,0.146486,0.146486,0.134503,0.134503,0.134503,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
30d,6,0.006938,3.666667,0.063158,0.031579,0.003158,0.137371,0.137371,0.137371,0.125731,0.125731,0.125731,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
30d,15,0.007949,3.666667,0.063158,0.031579,0.003158,0.137371,0.137371,0.137371,0.125731,0.125731,0.125731,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
30d,10,0.006352,3.666667,0.063158,0.031579,0.003158,0.137371,0.137371,0.137371,0.125731,0.125731,0.125731,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
30d,9,0.005326,3.666667,0.063158,0.031579,0.003158,0.137371,0.137371,0.137371,0.125731,0.125731,0.125731,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21d,7,0.004975,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815
21d,6,0.006054,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815
21d,5,0.005794,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815
7d,2,0.007141,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Saved dataframe into /home/daviddavo/recsys4daos/data/output/Plaza/models/itemknn-best-avg_3d_normalize.pq


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-05-03,30d,1,2022-05-03,0.000961,0.000696,2,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058682
2022-05-06,30d,1,2022-05-06,0.014102,0.001861,5,4.0,4.0,1.0,0.333333,0.4,0.2,...,0.5,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.103045
2022-05-09,30d,1,2022-05-09,0.003796,0.001742,4,3.0,3.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.099495
2022-05-12,30d,1,2022-05-12,0.000903,0.000681,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.059741
2022-05-15,30d,1,2022-05-15,0.005011,0.00068,5,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057011
2022-05-18,30d,1,2022-05-18,0.000898,0.000673,2,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056969
2022-05-21,30d,1,2022-05-21,0.016069,0.000732,9,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05764
2022-05-24,30d,1,2022-05-24,0.00569,0.000715,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057524
2022-05-27,30d,1,2022-05-27,0.005731,0.000702,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.060308
2022-05-30,30d,1,2022-05-30,0.000889,0.000673,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058641


### Best hparams by fold

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Plaza/models/itemknn-best-test_3d_normalize.pq


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-05-03,1,7d,0.006026,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-06,10,10YE,0.01956,4.0,0.4,0.2,0.02,0.919721,0.919721,0.919721,0.833333,0.833333,0.833333,1.0,1.0,1.0,0.5,0.5,0.5
2022-05-09,2,30d,0.003775,3.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-12,2,7d,0.000899,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-15,15,30d,0.01614,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-18,1,60d,0.005728,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-21,1,10YE,0.012126,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-24,3,21d,0.011394,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-27,9,21d,0.000893,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-30,6,7d,0.000905,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
count,19,19.0,19.0,19.0,3.0,3.0,19.0,19.0,19.0,19.0,...,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0
mean,2022-06-02 00:00:00,0.007443,0.001035,3.947368,3.666667,3.666667,0.157895,0.087719,0.063158,0.031579,...,0.140351,0.157895,0.157895,0.157895,0.096491,0.114035,0.114035,0.114035,0.114035,0.064616
min,2022-05-06 00:00:00,0.000893,0.000627,0.0,3.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055819
25%,2022-05-19 12:00:00,0.000943,0.000675,2.0,3.5,3.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057061
50%,2022-06-02 00:00:00,0.005607,0.000694,4.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057287
75%,2022-06-15 12:00:00,0.013561,0.000772,5.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05973
max,2022-06-29 00:00:00,0.01956,0.003962,9.0,4.0,4.0,1.0,0.666667,0.6,0.3,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.101838
std,,0.006527,0.000876,2.414624,0.57735,0.57735,0.374634,0.217792,0.164014,0.082007,...,0.339131,0.374634,0.374634,0.374634,0.256495,0.283565,0.283565,0.283565,0.283565,0.01618


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift()
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-valid', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Plaza/models/itemknn-best-valid_3d_normalize.pq


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-05-03,10,10YE,0.01094,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-06,2,30d,0.002517,4.0,0.4,0.2,0.02,0.877215,0.877215,0.877215,0.75,0.75,0.75,1.0,1.0,1.0,0.5,0.5,0.5
2022-05-09,2,7d,0.0104,3.0,0.2,0.1,0.01,0.5,0.5,0.5,0.333333,0.333333,0.333333,1.0,1.0,1.0,,,
2022-05-12,15,30d,0.000939,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-15,1,60d,0.001756,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-18,1,10YE,0.012087,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-21,3,21d,0.000917,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-24,9,21d,0.005694,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-27,6,7d,0.011445,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-30,1,7d,0.016083,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
count,19,19.0,19.0,19.0,3.0,3.0,19.0,19.0,19.0,19.0,...,19.0,19.0,19.0,19.0,19.0,18.0,18.0,18.0,18.0,19.0
mean,2022-05-30 00:00:00,0.008048,0.000893,4.0,3.666667,3.666667,0.105263,0.070175,0.063158,0.031579,...,0.114035,0.157895,0.157895,0.157895,0.04386,0.064815,0.064815,0.064815,0.064815,0.06442
min,2022-05-03 00:00:00,0.000917,0.000662,0.0,3.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056775
25%,2022-05-16 12:00:00,0.002797,0.000688,2.0,3.5,3.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05708
50%,2022-05-30 00:00:00,0.0104,0.000703,4.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057868
75%,2022-06-12 12:00:00,0.011573,0.000823,5.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05955
max,2022-06-26 00:00:00,0.016083,0.002005,9.0,4.0,4.0,1.0,0.666667,0.6,0.3,...,1.0,1.0,1.0,1.0,0.5,0.666667,0.666667,0.666667,0.666667,0.102641
std,,0.005208,0.000411,2.357023,0.57735,0.57735,0.315302,0.178434,0.164014,0.082007,...,0.283565,0.374634,0.374634,0.374634,0.13428,0.190792,0.190792,0.190792,0.190792,0.015503


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))



fold_t             2022-06-29 00:00:00
time_train                    0.589368
time_rec                      0.001045
open_proposals                       1
min_recs                           NaN
avg_recs                           NaN
precision@1                        0.0
precision@3                        0.0
precision@5                        0.0
precision@10                       0.0
precision@100                      0.0
ndcg@1                             0.0
ndcg@3                             0.0
ndcg@5                             0.0
ndcg@10                            0.0
ndcg@100                           0.0
map@1                              0.0
map@3                              0.0
map@5                              0.0
map@10                             0.0
map@100                            0.0
recall@1                           0.0
recall@3                           0.0
recall@5                           0.0
recall@10                          0.0
recall@100               

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

  0%|          | 0/1540 [00:00<?, ?it/s]



















































































































































































































































































































































































[2024-07-04T08:34:13.488277] Saving checkpoint at ../.cache/Plaza/hparams-userknn_3d_normalize.pkl
































































































































































































































































Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
0,2022-05-03,1,7d,2022-05-03,0.000413,0.000731,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.059008
1,2022-05-03,1,14d,2022-05-03,0.000345,0.000749,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057004
2,2022-05-03,1,21d,2022-05-03,0.000292,0.000640,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057365
3,2022-05-03,1,30d,2022-05-03,0.000274,0.000581,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055360
4,2022-05-03,1,60d,2022-05-03,0.000315,0.000812,2,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1535,2022-06-29,15,21d,2022-06-29,0.000290,0.000585,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057455
1536,2022-06-29,15,30d,2022-06-29,0.000339,0.000586,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055527
1537,2022-06-29,15,60d,2022-06-29,0.000278,0.000511,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054837
1538,2022-06-29,15,90d,2022-06-29,0.000369,0.000661,1,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055246


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
90d,15,0.000339,3.666667,0.063158,0.031579,0.003158,0.146486,0.146486,0.146486,0.134503,0.134503,0.134503,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
90d,6,0.000348,3.666667,0.063158,0.031579,0.003158,0.146486,0.146486,0.146486,0.134503,0.134503,0.134503,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
90d,1,0.000358,3.666667,0.063158,0.031579,0.003158,0.146486,0.146486,0.146486,0.134503,0.134503,0.134503,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
90d,3,0.000348,3.666667,0.063158,0.031579,0.003158,0.146486,0.146486,0.146486,0.134503,0.134503,0.134503,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
90d,4,0.000344,3.666667,0.063158,0.031579,0.003158,0.146486,0.146486,0.146486,0.134503,0.134503,0.134503,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21d,2,0.000299,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815
14d,6,0.000292,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815
14d,5,0.000291,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815
14d,4,0.000295,3.666667,0.063158,0.031579,0.003158,0.111055,0.111055,0.111055,0.090643,0.090643,0.090643,0.157895,0.157895,0.157895,0.064815,0.064815,0.064815


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Plaza/models/userknn-best-avg_3d_normalize.pq


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-05-03,90d,15,2022-05-03,0.000294,0.000549,2,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055143
2022-05-06,90d,15,2022-05-06,0.000364,0.001264,5,4.0,4.0,1.0,0.333333,0.4,0.2,...,0.5,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.099999
2022-05-09,90d,15,2022-05-09,0.000296,0.001084,4,3.0,3.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.096658
2022-05-12,90d,15,2022-05-12,0.000362,0.000671,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055327
2022-05-15,90d,15,2022-05-15,0.000292,0.000572,5,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054716
2022-05-18,90d,15,2022-05-18,0.000355,0.00064,2,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056034
2022-05-21,90d,15,2022-05-21,0.000304,0.000571,9,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055656
2022-05-24,90d,15,2022-05-24,0.000364,0.000711,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055477
2022-05-27,90d,15,2022-05-27,0.000299,0.000548,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055023
2022-05-30,90d,15,2022-05-30,0.000336,0.000744,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055602


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Plaza/models/userknn-best-test_3d_normalize.pq


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-05-03,1,7d,0.000413,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-06,9,10YE,0.000468,4.0,0.4,0.2,0.02,0.919721,0.919721,0.919721,0.833333,0.833333,0.833333,1.0,1.0,1.0,0.5,0.5,0.5
2022-05-09,3,90d,0.000298,3.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-05-12,2,7d,0.000317,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-15,15,30d,0.000269,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-18,1,60d,0.000282,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-21,1,10YE,0.000488,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-24,3,21d,0.000259,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-27,9,21d,0.000323,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-05-30,6,7d,0.000289,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,19.0,3.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0
mean,0.000333,3.666667,0.063158,0.031579,0.003158,0.151957,0.151957,0.151957,0.144737,0.144737,0.144737,0.157895,0.157895,0.157895,0.114035,0.114035,0.114035
min,0.000259,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.000281,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.000299,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.000367,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.000494,4.0,0.6,0.3,0.03,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,7.8e-05,0.57735,0.164014,0.082007,0.008201,0.360798,0.360798,0.360798,0.344536,0.344536,0.344536,0.374634,0.374634,0.374634,0.283565,0.283565,0.283565


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift()
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-valid', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Plaza/models/userknn-best-valid_3d_normalize.pq


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-05-03,9,10YE,2022-05-03,0.000458,0.000734,2,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056408
2022-05-06,3,90d,2022-05-06,0.00034,0.001288,5,4.0,4.0,1.0,0.333333,0.4,0.2,...,0.5,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.100909
2022-05-09,2,7d,2022-05-09,0.000291,0.001093,4,3.0,3.0,0.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,0.0,,,,,0.09332
2022-05-12,15,30d,2022-05-12,0.000321,0.000588,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055762
2022-05-15,1,60d,2022-05-15,0.000495,0.000591,5,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055836
2022-05-18,1,10YE,2022-05-18,0.000407,0.000624,2,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054936
2022-05-21,3,21d,2022-05-21,0.000414,0.000615,9,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055446
2022-05-24,9,21d,2022-05-24,0.000261,0.000509,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054719
2022-05-27,6,7d,2022-05-27,0.000254,0.000483,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055412
2022-05-30,1,7d,2022-05-30,0.000259,0.000541,4,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@3,recall@5,recall@10,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@100,time_eval
count,19,19.0,19.0,19.0,3.0,3.0,19.0,19.0,19.0,19.0,...,19.0,19.0,19.0,19.0,19.0,18.0,18.0,18.0,18.0,19.0
mean,2022-05-30 00:00:00,0.000359,0.000704,4.0,3.666667,3.666667,0.105263,0.070175,0.063158,0.031579,...,0.114035,0.157895,0.157895,0.157895,0.04386,0.064815,0.064815,0.064815,0.064815,0.062395
min,2022-05-03 00:00:00,0.000254,0.000483,0.0,3.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054719
25%,2022-05-16 12:00:00,0.000303,0.000585,2.0,3.5,3.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055429
50%,2022-05-30 00:00:00,0.00034,0.000622,4.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055836
75%,2022-06-12 12:00:00,0.000411,0.000681,5.0,4.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056503
max,2022-06-26 00:00:00,0.000495,0.001326,9.0,4.0,4.0,1.0,0.666667,0.6,0.3,...,1.0,1.0,1.0,1.0,0.5,0.666667,0.666667,0.666667,0.666667,0.100909
std,,7.9e-05,0.000248,2.357023,0.57735,0.57735,0.315302,0.178434,0.164014,0.082007,...,0.283565,0.374634,0.374634,0.374634,0.13428,0.190792,0.190792,0.190792,0.190792,0.015979
