In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
ORG_NAME = "MetaCartel - MetaCartel Ventures"
SPLITS_FREQ = "W-THU"
LAST_FOLDS = 10
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2022-01-06"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1088 entries, 0 to 1087
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 1088 non-null   object        
 1   author             1088 non-null   object        
 2   date               1088 non-null   datetime64[us]
 3   start              1088 non-null   datetime64[us]
 4   end                807 non-null    datetime64[us]
 5   platform_proposal  1088 non-null   object        
dtypes: datetime64[us](3), object(3)
memory usage: 51.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3287 entries, 0 to 3286
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        3287 non-null   object        
 1   proposal  3287 non-null   object        
 2   voter     3287 non-null   object        
 3   date      3287 non-null   datetime64[us]
dtypes:

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0x865c2f85c9fea1c6ac7f53de07554d68cb92ed88,a3cd7b01-435c-50cd-8d8c-75102df1027c,2019-06-06 09:49:38,1
1,0x865c2f85c9fea1c6ac7f53de07554d68cb92ed88,83841f4c-18bf-57ef-bb9f-f0383b99fd32,2019-06-07 07:03:42,1
2,0x865c2f85c9fea1c6ac7f53de07554d68cb92ed88,5c99eeb2-9854-5d2d-ba20-1ff1307ea924,2019-06-07 07:04:48,1
3,0x865c2f85c9fea1c6ac7f53de07554d68cb92ed88,fc0c49e1-5e4a-528b-92e3-42b3fb560d16,2019-06-10 10:49:11,1
4,0x865c2f85c9fea1c6ac7f53de07554d68cb92ed88,7179b537-bb7e-5677-adcd-3f44fcc78805,2019-06-10 10:49:31,1
...,...,...,...,...
3282,0xe2a82cdccbfa6ebf9817b0c4aed45264bd41fbec,52983d9e-5fd1-5afd-8c2a-936b38d65f06,2023-06-23 16:28:23,1
3283,0x6dc43be93a8b5fd37dc16f24872babc6da5e5e3e,06f50c00-d777-55c6-917b-ff9c0cf7f3ce,2023-06-28 20:30:25,1
3284,0xbaf6e57a3940898fd21076b139d4ab231dcbbc5f,06f50c00-d777-55c6-917b-ff9c0cf7f3ce,2023-06-28 22:00:45,1
3285,0xe2a82cdccbfa6ebf9817b0c4aed45264bd41fbec,6d79288e-6319-565e-9316-3b9027f6ec8e,2023-07-07 17:52:59,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2021-11-04 00:00:00'),
 Timestamp('2021-11-11 00:00:00'),
 Timestamp('2021-11-18 00:00:00'),
 Timestamp('2021-11-25 00:00:00'),
 Timestamp('2021-12-02 00:00:00'),
 Timestamp('2021-12-09 00:00:00'),
 Timestamp('2021-12-16 00:00:00'),
 Timestamp('2021-12-23 00:00:00'),
 Timestamp('2021-12-30 00:00:00'),
 Timestamp('2022-01-06 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]




fold_t             2022-01-06 00:00:00
time_train                    5.556716
time_rec                      0.001689
open_proposals                       7
min_recs                           NaN
avg_recs                           NaN
precision@1                        0.0
precision@3                        0.0
precision@5                        0.0
precision@10                       0.0
precision@15                       0.0
precision@100                      0.0
ndcg@1                             0.0
ndcg@3                             0.0
ndcg@5                             0.0
ndcg@10                            0.0
ndcg@15                            0.0
ndcg@100                           0.0
map@1                              0.0
map@3                              0.0
map@5                              0.0
map@10                             0.0
map@15                             0.0
map@100                            0.0
recall@1                           0.0
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/MetaCartel - MetaCartel Ventures/hparams-itemknn_W-THU_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2021-11-04,1,7d,2021-11-04,0.005911,0.143014,18,12.0,15.25,0.0,...,0.4,1.0,1.0,0.000000,0.266667,0.266667,0.266667,0.266667,0.266667,0.122811
1,2021-11-04,1,14d,2021-11-04,0.005941,0.005618,18,8.0,14.25,0.5,...,0.4,1.0,1.0,0.266667,0.266667,0.366667,0.366667,0.366667,0.366667,0.128361
2,2021-11-04,1,21d,2021-11-04,0.005557,0.005569,18,8.0,14.25,0.5,...,0.4,1.0,1.0,0.266667,0.266667,0.366667,0.366667,0.366667,0.366667,0.128029
3,2021-11-04,1,30d,2021-11-04,0.005695,0.005853,18,8.0,14.25,0.5,...,0.5,1.0,1.0,0.266667,0.266667,0.366667,0.366667,0.366667,0.366667,0.127931
4,2021-11-04,1,60d,2021-11-04,0.015146,0.006509,18,8.0,14.25,0.5,...,0.5,1.0,1.0,0.266667,0.266667,0.366667,0.366667,0.366667,0.366667,0.127583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2022-01-06,15,21d,2022-01-06,0.001532,0.000923,7,,,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.069880
766,2022-01-06,15,30d,2022-01-06,0.015064,0.002070,7,7.0,7.00,0.0,...,1.0,1.0,1.0,0.000000,,,,,,0.113355
767,2022-01-06,15,60d,2022-01-06,0.006074,0.002437,7,7.0,7.00,0.0,...,1.0,1.0,1.0,0.000000,,,,,,0.116733
768,2022-01-06,15,90d,2022-01-06,0.001753,0.002529,7,7.0,7.00,0.0,...,1.0,1.0,1.0,0.000000,,,,,,0.112714


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
60d,10,0.004796,18.950000,0.167778,0.102778,0.011389,0.427928,0.458101,0.476505,0.403488,0.422094,0.428867,0.646296,0.715278,0.777778,0.452381,0.527778,0.527778
90d,2,0.006177,18.978571,0.174444,0.102778,0.011389,0.459412,0.485861,0.504122,0.405247,0.421803,0.428520,0.655093,0.715278,0.777778,0.519444,0.519444,0.519444
10YE,3,0.034444,18.996429,0.163333,0.102778,0.011389,0.467952,0.505343,0.523412,0.397840,0.421649,0.428184,0.627315,0.715278,0.777778,0.519444,0.519444,0.519444
90d,1,0.002224,18.978571,0.170000,0.100556,0.011389,0.459227,0.485214,0.504300,0.406481,0.421345,0.427963,0.647685,0.707870,0.777778,0.541667,0.541667,0.541667
60d,2,0.002688,18.950000,0.173333,0.102778,0.011389,0.430035,0.456484,0.474745,0.401790,0.419140,0.425857,0.653241,0.715278,0.777778,0.513889,0.513889,0.513889
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7d,5,0.008257,18.972222,0.140741,0.070370,0.010370,0.156758,0.156758,0.171065,0.265741,0.265741,0.284350,0.537037,0.537037,0.666667,0.300000,0.300000,0.300000
7d,4,0.004323,18.972222,0.140741,0.070370,0.010370,0.156758,0.156758,0.171065,0.265741,0.265741,0.284350,0.537037,0.537037,0.666667,0.300000,0.300000,0.300000
7d,3,0.004558,18.972222,0.140741,0.070370,0.010370,0.156758,0.156758,0.171065,0.265741,0.265741,0.284350,0.537037,0.537037,0.666667,0.300000,0.300000,0.300000
7d,2,0.004317,18.972222,0.140741,0.070370,0.010370,0.156758,0.156758,0.171065,0.265741,0.265741,0.284350,0.537037,0.537037,0.666667,0.300000,0.300000,0.300000


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/MetaCartel - MetaCartel Ventures/models/itemknn-best-avg_W-THU_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-11-04,60d,10,2021-11-04,0.002182,0.006433,18,8.0,14.25,0.5,0.166667,0.15,0.2,...,0.9,1.0,1.0,0.266667,0.266667,0.266667,0.266667,0.266667,0.266667,0.127181
2021-11-11,60d,10,2021-11-11,0.005806,0.007196,52,20.0,42.25,0.0,0.0,0.0,0.125,...,0.4375,0.75,1.0,0.0,0.0,0.0,,,,0.106728
2021-11-18,60d,10,2021-11-18,0.005628,0.006576,40,38.0,39.5,0.75,0.416667,0.25,0.15,...,1.0,1.0,1.0,1.0,0.916667,0.916667,0.916667,0.916667,0.916667,0.127501
2021-11-25,60d,10,2021-11-25,0.015131,0.001548,9,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067993
2021-12-02,60d,10,2021-12-02,0.001514,0.007248,8,6.0,7.4,0.4,0.333333,0.28,0.14,...,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.126054
2021-12-09,60d,10,2021-12-09,0.004316,0.004568,9,6.0,7.5,0.5,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.121485
2021-12-16,60d,10,2021-12-16,0.001619,0.003588,16,14.0,15.0,0.0,0.0,0.3,0.15,...,1.0,1.0,1.0,0.0,0.0,,,,,0.109778
2021-12-23,60d,10,2021-12-23,0.001645,0.007199,16,10.0,14.0,0.8,0.466667,0.28,0.16,...,1.0,1.0,1.0,0.625,0.75,0.75,0.75,0.75,0.75,0.124262
2021-12-30,60d,10,2021-12-30,0.001624,0.00148,10,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068758
2022-01-06,60d,10,2022-01-06,0.005885,0.002398,7,7.0,7.0,0.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.0,,,,,,0.113529


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/MetaCartel - MetaCartel Ventures/models/itemknn-best-val_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-11-04,5,60d,0.001139,14.25,0.2,0.225,0.025,0.190292,0.36349,0.378766,0.195,0.34506,0.365893,0.266667,0.95,1.0,0.366667,0.366667,0.366667
2021-11-11,6,7d,0.006505,35.0,0.2,0.1,0.02,0.173357,0.173357,0.255041,0.291667,0.291667,0.350962,0.5,0.5,1.0,0.5,0.5,0.5
2021-11-18,15,90d,0.001766,39.6,0.24,0.14,0.014,0.906144,0.939576,0.939576,0.877778,0.911111,0.911111,0.933333,1.0,1.0,0.933333,0.933333,0.933333
2021-11-25,15,90d,0.001664,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-02,2,21d,0.002901,7.25,0.3,0.15,0.015,0.344304,0.344304,0.344304,0.541667,0.541667,0.541667,1.0,1.0,1.0,0.5,0.5,0.5
2021-12-09,7,90d,0.015212,7.5,0.2,0.1,0.01,0.815465,0.815465,0.815465,0.75,0.75,0.75,1.0,1.0,1.0,1.0,1.0,1.0
2021-12-16,1,7d,0.014991,14.0,0.2,0.1,0.01,0.25,0.25,0.25,0.333333,0.333333,0.333333,1.0,1.0,1.0,,,
2021-12-23,3,7d,0.005087,14.333333,0.266667,0.133333,0.013333,0.6,0.6,0.6,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2021-12-30,15,90d,0.001653,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-01-06,10,90d,0.001971,7.0,0.2,0.1,0.01,0.63093,0.63093,0.63093,0.5,0.5,0.5,1.0,1.0,1.0,,,


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,7.0,7.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,7.0,7.0,7.0,7.0,7.0,9.0
mean,2021-12-09 00:00:00,0.00575,0.003802,18.555556,15.428571,17.811905,0.311111,0.279012,0.178519,0.091481,...,0.722222,0.777778,0.777778,0.37037,0.561905,0.561905,0.561905,0.561905,0.561905,0.109727
min,2021-11-11 00:00:00,0.001653,0.001638,7.0,6.0,7.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068647
25%,2021-11-25 00:00:00,0.001766,0.001889,9.0,6.5,7.375,0.0,0.333333,0.2,0.1,...,0.5,1.0,1.0,0.0,0.25,0.25,0.25,0.25,0.25,0.113147
50%,2021-12-09 00:00:00,0.002901,0.003531,10.0,13.0,14.0,0.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.0,0.5,0.5,0.5,0.5,0.5,0.121119
75%,2021-12-23 00:00:00,0.006505,0.005431,16.0,19.0,24.666667,0.5,0.333333,0.24,0.133333,...,1.0,1.0,1.0,0.833333,0.966667,0.966667,0.966667,0.966667,0.966667,0.124755
max,2022-01-06 00:00:00,0.015212,0.007915,52.0,38.0,39.6,1.0,0.444444,0.3,0.15,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.125768
std,,0.005564,0.002148,16.17182,11.830147,13.732985,0.39826,0.162963,0.107175,0.055405,...,0.440959,0.440959,0.440959,0.462314,0.439877,0.439877,0.439877,0.439877,0.439877,0.023571


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/MetaCartel - MetaCartel Ventures/models/itemknn-best-test_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-11-11,5,60d,0.002875,42.25,0.0,0.125,0.0225,0.0,0.201204,0.363171,0.0,0.103571,0.161012,0.0,0.4375,1.0,0.0,,
2021-11-18,6,7d,0.012567,38.0,0.2,0.1,0.03,0.036308,0.036308,0.083388,0.066667,0.066667,0.174854,0.333333,0.333333,1.0,,,
2021-11-25,15,90d,0.001664,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-02,15,90d,0.001889,7.5,0.3,0.15,0.015,0.509738,0.509738,0.509738,0.525,0.525,0.525,1.0,1.0,1.0,0.5,0.5,0.5
2021-12-09,2,21d,0.012618,7.5,0.1,0.1,0.01,0.193426,0.351159,0.351159,0.1,0.1625,0.1625,0.5,1.0,1.0,,,
2021-12-16,7,90d,0.002058,15.0,0.3,0.15,0.015,0.465971,0.465971,0.465971,0.2875,0.2875,0.2875,1.0,1.0,1.0,,,
2021-12-23,1,7d,0.005987,14.333333,0.266667,0.133333,0.013333,0.6,0.6,0.6,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2021-12-30,3,7d,0.005626,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-01-06,15,90d,0.001753,7.0,0.2,0.1,0.01,0.63093,0.63093,0.63093,0.5,0.5,0.5,1.0,1.0,1.0,,,


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,7.0,7.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,8.0,5.0,4.0,4.0,4.0,9.0
mean,2021-12-09 00:00:00,0.005226,0.003826,18.555556,14.857143,18.797619,0.166667,0.123457,0.151852,0.09537,...,0.641204,0.699074,0.777778,0.148148,0.1875,0.3,0.375,0.375,0.375,0.104436
min,2021-11-11 00:00:00,0.001664,0.000737,7.0,6.0,7.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068863
25%,2021-11-25 00:00:00,0.001889,0.001744,9.0,6.5,7.5,0.0,0.0,0.0,0.1,...,0.333333,0.625,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.107023
50%,2021-12-09 00:00:00,0.002875,0.003486,10.0,13.0,14.333333,0.0,0.0,0.2,0.1,...,1.0,1.0,1.0,0.0,0.0,0.0,0.25,0.25,0.25,0.109972
75%,2021-12-23 00:00:00,0.005987,0.004103,16.0,17.0,26.5,0.0,0.333333,0.266667,0.133333,...,1.0,1.0,1.0,0.0,0.125,0.5,0.625,0.625,0.625,0.112714
max,2022-01-06 00:00:00,0.012618,0.008949,52.0,38.0,42.25,1.0,0.444444,0.3,0.15,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.125764
std,,0.004482,0.002684,16.17182,11.437199,14.986403,0.353553,0.187942,0.129219,0.057601,...,0.447526,0.423782,0.440959,0.305556,0.372012,0.447214,0.478714,0.478714,0.478714,0.020977


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))



fold_t             2022-01-06 00:00:00
time_train                    0.595576
time_rec                      0.001143
open_proposals                       7
min_recs                           NaN
avg_recs                           NaN
precision@1                        0.0
precision@3                        0.0
precision@5                        0.0
precision@10                       0.0
precision@15                       0.0
precision@100                      0.0
ndcg@1                             0.0
ndcg@3                             0.0
ndcg@5                             0.0
ndcg@10                            0.0
ndcg@15                            0.0
ndcg@100                           0.0
map@1                              0.0
map@3                              0.0
map@5                              0.0
map@10                             0.0
map@15                             0.0
map@100                            0.0
recall@1                           0.0
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

Restored checkpoint from ../.cache/MetaCartel - MetaCartel Ventures/hparams-userknn_W-THU_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2021-11-04,1,7d,2021-11-04,0.000390,0.730654,18,12.0,15.25,0.0,...,0.4,1.0,1.0,0.0,0.000000,0.200000,0.200000,0.200000,0.200000,0.117282
1,2021-11-04,1,14d,2021-11-04,0.000325,0.003604,18,8.0,14.25,0.0,...,0.4,0.5,1.0,0.0,0.333333,0.266667,0.266667,0.266667,0.266667,0.122500
2,2021-11-04,1,21d,2021-11-04,0.000372,0.003543,18,8.0,14.25,0.0,...,0.4,0.5,1.0,0.0,0.333333,0.266667,0.266667,0.266667,0.266667,0.121771
3,2021-11-04,1,30d,2021-11-04,0.000318,0.003836,18,8.0,14.25,0.0,...,0.4,0.5,1.0,0.0,0.266667,0.366667,0.366667,0.366667,0.366667,0.122752
4,2021-11-04,1,60d,2021-11-04,0.000376,0.004128,18,8.0,14.25,0.0,...,0.4,1.0,1.0,0.0,0.333333,0.266667,0.266667,0.266667,0.266667,0.121385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2022-01-06,15,21d,2022-01-06,0.000259,0.000710,7,,,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.066738
766,2022-01-06,15,30d,2022-01-06,0.000375,0.001439,7,7.0,7.00,0.0,...,1.0,1.0,1.0,0.0,,,,,,0.113645
767,2022-01-06,15,60d,2022-01-06,0.000364,0.001590,7,7.0,7.00,0.0,...,1.0,1.0,1.0,0.0,,,,,,0.111589
768,2022-01-06,15,90d,2022-01-06,0.000417,0.001778,7,7.0,7.00,0.0,...,1.0,1.0,1.0,0.0,,,,,,0.111447


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
60d,1,0.000388,18.950000,0.150000,0.094444,0.011389,0.409484,0.455053,0.479724,0.403333,0.429292,0.436810,0.574074,0.694444,0.777778,0.484375,0.553571,0.553571
10YE,4,0.001309,18.996429,0.146667,0.094444,0.011389,0.454096,0.500773,0.525074,0.398009,0.422513,0.429669,0.568519,0.694444,0.777778,0.481250,0.550000,0.550000
60d,3,0.000406,18.950000,0.144444,0.094444,0.011389,0.399543,0.447982,0.472283,0.393148,0.420981,0.428136,0.564815,0.694444,0.777778,0.505208,0.577381,0.577381
60d,4,0.000401,18.950000,0.144444,0.094444,0.011389,0.399543,0.447982,0.472283,0.393148,0.420981,0.428136,0.564815,0.694444,0.777778,0.505208,0.577381,0.577381
60d,2,0.000407,18.950000,0.144444,0.094444,0.011389,0.399543,0.447982,0.472283,0.393148,0.420981,0.428136,0.564815,0.694444,0.777778,0.505208,0.577381,0.577381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7d,6,0.000289,18.972222,0.118519,0.070370,0.010370,0.129392,0.141136,0.155443,0.209259,0.221825,0.240435,0.481481,0.537037,0.666667,0.200000,0.250000,0.250000
7d,4,0.000293,18.972222,0.118519,0.070370,0.010370,0.129392,0.141136,0.155443,0.209259,0.221825,0.240435,0.481481,0.537037,0.666667,0.200000,0.250000,0.250000
7d,3,0.000283,18.972222,0.118519,0.070370,0.010370,0.129392,0.141136,0.155443,0.209259,0.221825,0.240435,0.481481,0.537037,0.666667,0.200000,0.250000,0.250000
7d,2,0.000278,18.972222,0.118519,0.070370,0.010370,0.129392,0.141136,0.155443,0.209259,0.221825,0.240435,0.481481,0.537037,0.666667,0.200000,0.250000,0.250000


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/MetaCartel - MetaCartel Ventures/models/userknn-best-avg_W-THU_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-11-04,60d,1,2021-11-04,0.000376,0.004128,18,8.0,14.25,0.0,0.083333,0.1,0.15,...,0.4,1.0,1.0,0.0,0.333333,0.266667,0.266667,0.266667,0.266667,0.121385
2021-11-11,60d,1,2021-11-11,0.000394,0.004015,52,20.0,42.25,0.0,0.0,0.0,0.05,...,0.25,0.5,1.0,0.0,0.0,0.0,,,,0.106184
2021-11-18,60d,1,2021-11-18,0.000434,0.003988,40,38.0,39.5,1.0,0.333333,0.25,0.15,...,1.0,1.0,1.0,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,0.126463
2021-11-25,60d,1,2021-11-25,0.000394,0.001248,9,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069428
2021-12-02,60d,1,2021-12-02,0.000334,0.004291,8,6.0,7.4,0.6,0.266667,0.28,0.14,...,1.0,1.0,1.0,0.666667,0.666667,0.666667,0.666667,0.666667,0.666667,0.125857
2021-12-09,60d,1,2021-12-09,0.00041,0.002456,9,6.0,7.5,0.5,0.166667,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.121665
2021-12-16,60d,1,2021-12-16,0.000367,0.002485,16,14.0,15.0,0.5,0.166667,0.1,0.15,...,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.122962
2021-12-23,60d,1,2021-12-23,0.000418,0.004628,16,10.0,14.0,0.6,0.4,0.32,0.16,...,1.0,1.0,1.0,0.833333,0.875,0.875,0.875,0.875,0.875,0.123187
2021-12-30,60d,1,2021-12-30,0.000395,0.001202,10,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066515
2022-01-06,60d,1,2022-01-06,0.000346,0.001545,7,7.0,7.0,0.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.0,,,,,,0.110635


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/MetaCartel - MetaCartel Ventures/models/userknn-best-val_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-11-04,3,30d,0.00037,14.25,0.15,0.15,0.025,0.140161,0.204199,0.316364,0.141667,0.212798,0.28146,0.216667,0.4,1.0,0.266667,0.266667,0.266667
2021-11-11,1,7d,0.000288,35.0,0.2,0.1,0.02,0.14266,0.14266,0.224345,0.208333,0.208333,0.267628,0.5,0.5,1.0,,,
2021-11-18,1,90d,0.000494,39.6,0.24,0.14,0.014,0.934277,0.963885,0.963885,0.9,0.925,0.925,0.933333,1.0,1.0,0.866667,0.866667,0.866667
2021-11-25,15,10YE,0.001251,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-02,6,10YE,0.001237,7.625,0.3,0.15,0.015,0.757397,0.757397,0.757397,0.627083,0.627083,0.627083,1.0,1.0,1.0,0.6,0.6,0.6
2021-12-09,4,60d,0.000407,7.5,0.2,0.1,0.01,0.715338,0.715338,0.715338,0.625,0.625,0.625,1.0,1.0,1.0,1.0,1.0,1.0
2021-12-16,2,21d,0.00036,15.0,0.1,0.15,0.015,0.306574,0.58139,0.58139,0.25,0.395833,0.395833,0.25,1.0,1.0,0.5,0.5,0.5
2021-12-23,5,7d,0.000302,14.333333,0.266667,0.133333,0.013333,0.6,0.6,0.6,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2021-12-30,15,30d,0.000286,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-01-06,9,30d,0.000344,7.0,0.2,0.1,0.01,0.63093,0.63093,0.63093,0.5,0.5,0.5,1.0,1.0,1.0,,,


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,9.0,7.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,7.0,7.0,7.0
mean,0.000552,18.008333,0.167407,0.097037,0.010815,0.454131,0.487956,0.497032,0.456713,0.475694,0.482283,0.631481,0.722222,0.777778,0.566667,0.566667,0.566667
min,0.000286,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.000302,7.5625,0.1,0.1,0.01,0.14266,0.14266,0.224345,0.208333,0.208333,0.267628,0.25,0.5,1.0,0.25,0.25,0.25
50%,0.00036,14.333333,0.2,0.1,0.013333,0.6,0.6,0.6,0.5,0.5,0.5,0.933333,1.0,1.0,0.6,0.6,0.6
75%,0.000494,25.0,0.24,0.14,0.015,0.715338,0.715338,0.715338,0.627083,0.627083,0.627083,1.0,1.0,1.0,0.933333,0.933333,0.933333
max,0.001251,39.6,0.3,0.15,0.02,0.934277,0.963885,0.963885,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,0.000398,13.643647,0.10982,0.058771,0.006811,0.349022,0.35142,0.342324,0.366142,0.362802,0.357845,0.446471,0.440959,0.440959,0.430762,0.430762,0.430762


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/MetaCartel - MetaCartel Ventures/models/userknn-best-test_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-11-11,3,30d,2021-11-11,0.000377,0.002302,52,20.0,32.5,0.0,0.0,0.0,0.1,...,0.5,0.5,1.0,0.0,0.0,0.0,,,,0.105319
2021-11-18,1,7d,2021-11-18,0.000278,0.001167,40,38.0,38.0,0.0,0.0,0.2,0.1,...,0.333333,0.666667,1.0,0.0,0.0,,,,,0.111607
2021-11-25,1,90d,2021-11-25,0.000389,0.001246,9,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068466
2021-12-02,15,10YE,2021-12-02,0.001232,0.007964,8,6.0,7.625,0.625,0.333333,0.3,0.15,...,1.0,1.0,1.0,0.6,0.6,0.6,0.6,0.6,0.6,0.124278
2021-12-09,6,10YE,2021-12-09,0.001308,0.003826,9,6.0,7.5,0.5,0.166667,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.121154
2021-12-16,4,60d,2021-12-16,0.000431,0.002426,16,14.0,15.0,0.5,0.166667,0.1,0.15,...,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.123652
2021-12-23,2,21d,2021-12-23,0.000301,0.004283,16,13.0,14.6,0.6,0.533333,0.32,0.16,...,1.0,1.0,1.0,0.833333,0.8,0.8,0.8,0.8,0.8,0.122873
2021-12-30,5,7d,2021-12-30,0.000292,0.000633,10,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067395
2022-01-06,15,30d,2022-01-06,0.000375,0.001439,7,7.0,7.0,0.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.0,,,,,,0.113645


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,7.0,7.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,8.0,7.0,6.0,6.0,6.0,9.0
mean,2021-12-09 00:00:00,0.000554,0.00281,18.555556,14.857143,17.460714,0.247222,0.17037,0.146667,0.095556,...,0.648148,0.685185,0.777778,0.325926,0.3625,0.414286,0.483333,0.483333,0.483333,0.106488
min,2021-11-11 00:00:00,0.000278,0.000633,7.0,6.0,7.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067395
25%,2021-11-25 00:00:00,0.000301,0.001246,9.0,6.5,7.5625,0.0,0.0,0.0,0.1,...,0.333333,0.5,1.0,0.0,0.0,0.0,0.125,0.125,0.125,0.105319
50%,2021-12-09 00:00:00,0.000377,0.002302,10.0,13.0,14.6,0.0,0.166667,0.2,0.1,...,1.0,1.0,1.0,0.0,0.25,0.5,0.55,0.55,0.55,0.113645
75%,2021-12-23 00:00:00,0.000431,0.003826,16.0,17.0,23.75,0.5,0.333333,0.2,0.15,...,1.0,1.0,1.0,0.6,0.65,0.7,0.75,0.75,0.75,0.122873
max,2022-01-06 00:00:00,0.001308,0.007964,52.0,38.0,38.0,0.625,0.533333,0.32,0.16,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124278
std,,0.00041,0.00229,16.17182,11.437199,12.699507,0.295921,0.193968,0.126886,0.059605,...,0.444444,0.428535,0.440959,0.410548,0.413824,0.418045,0.411906,0.411906,0.411906,0.022769
