In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
MAX_BATCH_SIZE = 9
GPUS = 32
ORG_NAME = "DEAD FoundationsDAO"
SPLITS_FREQ = "2d"
LAST_FOLDS = 20
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2021-11-28"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29681 entries, 0 to 29680
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 29681 non-null  object        
 1   author             29681 non-null  object        
 2   date               29681 non-null  datetime64[us]
 3   start              29681 non-null  datetime64[us]
 4   end                5799 non-null   datetime64[us]
 5   platform_proposal  29681 non-null  object        
dtypes: datetime64[us](3), object(3)
memory usage: 1.4+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17738 entries, 0 to 17737
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        17738 non-null  object        
 1   proposal  17738 non-null  object        
 2   voter     17738 non-null  object        
 3   date      17738 non-null  datetime64[us]
dtyp

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0x15c6ac4cf1b5e49c44332fb0a1043ccab19db80a,9b0951e8-192b-5b68-8e1a-1b392e5b704f,2020-11-25 14:31:40,1
1,0x15c6ac4cf1b5e49c44332fb0a1043ccab19db80a,9903573c-d495-5c45-8209-c16ac4527667,2020-11-25 14:34:40,1
2,0xa3564677fc4907a15c9a7eae1dbc1ae9ac57b8e1,83846b14-c310-5c84-a229-f8f88747a417,2020-12-04 01:16:40,1
3,0x15c6ac4cf1b5e49c44332fb0a1043ccab19db80a,83846b14-c310-5c84-a229-f8f88747a417,2020-12-04 01:41:40,1
4,0xa3564677fc4907a15c9a7eae1dbc1ae9ac57b8e1,e5b05bac-ebea-5d63-b09c-6721cb6c3b65,2020-12-05 00:14:50,1
...,...,...,...,...
17733,0x96e852000ec69fd7f1b5a552cb58a9d24f76e2c6,0d403a5d-a7ff-50ae-b3e9-7ab8020241eb,2022-06-28 08:18:00,1
17734,0x96e852000ec69fd7f1b5a552cb58a9d24f76e2c6,992cb72b-046d-55d8-81d0-f1e3f664c86a,2022-06-28 08:18:15,1
17735,0x96e852000ec69fd7f1b5a552cb58a9d24f76e2c6,6c40609b-866f-588e-947f-ae832a9e9af2,2022-06-28 08:18:30,1
17736,0x96e852000ec69fd7f1b5a552cb58a9d24f76e2c6,f4a42b8c-ee15-551f-8e65-86b17a237863,2022-06-28 08:19:45,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2021-10-21 00:00:00'),
 Timestamp('2021-10-23 00:00:00'),
 Timestamp('2021-10-25 00:00:00'),
 Timestamp('2021-10-27 00:00:00'),
 Timestamp('2021-10-29 00:00:00'),
 Timestamp('2021-10-31 00:00:00'),
 Timestamp('2021-11-02 00:00:00'),
 Timestamp('2021-11-04 00:00:00'),
 Timestamp('2021-11-06 00:00:00'),
 Timestamp('2021-11-08 00:00:00'),
 Timestamp('2021-11-10 00:00:00'),
 Timestamp('2021-11-12 00:00:00'),
 Timestamp('2021-11-14 00:00:00'),
 Timestamp('2021-11-16 00:00:00'),
 Timestamp('2021-11-18 00:00:00'),
 Timestamp('2021-11-20 00:00:00'),
 Timestamp('2021-11-22 00:00:00'),
 Timestamp('2021-11-24 00:00:00'),
 Timestamp('2021-11-26 00:00:00'),
 Timestamp('2021-11-28 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]


fold_t             2021-11-28 00:00:00
time_train                    7.556104
time_rec                      0.159692
open_proposals                      27
min_recs                            25
avg_recs                          26.6
precision@1                        0.0
precision@3                   0.066667
precision@5                       0.08
precision@10                      0.08
precision@15                  0.066667
precision@100                    0.014
ndcg@1                             0.0
ndcg@3                        0.011732
ndcg@5                        0.021837
ndcg@10                       0.057458
ndcg@15                       0.069958
ndcg@100                      0.086357
map@1                              0.0
map@3                         0.022222
map@5                         0.055556
map@10                        0.122222
map@15                        0.135556
map@100                        0.15417
recall@1                           0.0
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/DEAD FoundationsDAO/hparams-itemknn_2d_normalize.pkl with 1540 results


  0%|          | 0/1540 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2021-10-21,1,7d,2021-10-21,0.005595,0.001176,1,,,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.070121
1,2021-10-21,1,14d,2021-10-21,0.001035,0.000741,1,,,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.068694
2,2021-10-21,1,21d,2021-10-21,0.005806,0.000827,1,,,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.069667
3,2021-10-21,1,30d,2021-10-21,0.005762,0.000809,1,,,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.068955
4,2021-10-21,1,60d,2021-10-21,0.011479,0.000864,1,,,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.068558
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1535,2021-11-28,15,21d,2021-11-28,3.050268,0.027494,27,25.0,26.6,0.0,...,0.533333,0.733333,1.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.122075
1536,2021-11-28,15,30d,2021-11-28,4.549100,0.044403,27,25.0,26.6,0.0,...,0.533333,0.733333,1.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.126053
1537,2021-11-28,15,60d,2021-11-28,4.773909,0.045297,27,25.0,26.6,0.0,...,0.533333,0.733333,1.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.121795
1538,2021-11-28,15,90d,2021-11-28,4.776065,0.121643,27,25.0,26.6,0.0,...,0.533333,0.733333,1.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.121782


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
14d,2,1.622585,293.089979,0.027456,0.018941,0.016891,0.009768,0.012123,0.032991,0.030573,0.035769,0.051856,0.056165,0.101029,0.465888,0.104776,0.119744,0.242480
30d,2,2.421020,293.201144,0.027456,0.018868,0.016798,0.009768,0.012123,0.033359,0.030573,0.035695,0.051673,0.056165,0.100293,0.461581,0.104776,0.119744,0.242480
60d,2,2.385778,293.201144,0.027456,0.018868,0.016798,0.009768,0.012123,0.033359,0.030573,0.035695,0.051673,0.056165,0.100293,0.461581,0.104776,0.119744,0.242480
21d,2,2.041104,293.201144,0.027456,0.018868,0.016798,0.009768,0.012123,0.033359,0.030573,0.035695,0.051673,0.056165,0.100293,0.461581,0.104776,0.119744,0.242480
90d,2,2.376876,293.201144,0.027456,0.018868,0.016798,0.009768,0.012123,0.033359,0.030573,0.035695,0.051673,0.056165,0.100293,0.461581,0.104776,0.119744,0.242480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7d,6,0.748019,292.817193,0.016228,0.015388,0.017734,0.005296,0.008743,0.028704,0.024725,0.029029,0.047631,0.048977,0.076487,0.461158,0.083333,0.095935,0.204472
7d,5,0.747621,292.817193,0.016228,0.015388,0.017734,0.005296,0.008743,0.028701,0.024725,0.029029,0.047624,0.048977,0.076487,0.461158,0.083333,0.095935,0.204472
7d,4,0.746603,292.817193,0.016228,0.015388,0.017725,0.005296,0.008743,0.028652,0.024725,0.029029,0.047593,0.048977,0.076487,0.460322,0.083333,0.095935,0.204472
7d,3,0.747829,292.817193,0.016228,0.015388,0.017024,0.005296,0.008607,0.028373,0.024725,0.028885,0.047221,0.048977,0.076487,0.459980,0.083333,0.095935,0.202033


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/DEAD FoundationsDAO/models/itemknn-best-avg_2d_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-10-21,14d,2,2021-10-21,0.005735,0.000718,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068879
2021-10-23,14d,2,2021-10-23,0.000883,0.000676,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06795
2021-10-25,14d,2,2021-10-25,0.005569,0.000748,3,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068702
2021-10-27,14d,2,2021-10-27,0.393104,0.015777,728,448.0,586.333333,0.0,0.111111,0.133333,0.066667,...,0.003252,0.003252,0.091057,0.0,0.004878,0.009756,0.009756,0.009756,0.273171,0.150657
2021-10-29,14d,2,2021-10-29,0.723749,0.017131,1118,1112.0,1114.857143,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,,0.17154
2021-10-31,14d,2,2021-10-31,1.223198,0.09156,550,509.0,547.380952,0.015873,0.005291,0.003175,0.004762,...,0.035714,0.059524,0.250722,0.25,0.25,0.25,0.25,0.25,0.25,0.537703
2021-11-02,14d,2,2021-11-02,1.513915,0.026896,204,194.0,199.0,0.0,0.0,0.033333,0.016667,...,0.055556,0.055556,0.343254,0.0,0.0,,,,,0.134899
2021-11-04,14d,2,2021-11-04,1.584422,0.031356,76,71.0,74.6,0.0,0.0,0.0,0.02,...,0.133333,0.233333,1.0,0.0,0.0,0.0,,,,0.116707
2021-11-06,14d,2,2021-11-06,1.654837,0.028364,32,25.0,29.142857,0.142857,0.142857,0.085714,0.057143,...,0.261905,0.452381,1.0,0.5,0.416667,0.416667,0.416667,0.416667,0.416667,0.136812
2021-11-08,14d,2,2021-11-08,1.790687,0.025214,115,110.0,112.666667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,,0.100038


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/DEAD FoundationsDAO/models/itemknn-best-val_2d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-10-21,1,7d,0.005595,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-23,15,10YE,0.002328,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-25,10,7d,0.001326,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-27,1,10YE,0.432727,586.333333,0.133333,0.066667,0.183333,0.06313,0.040967,0.098951,0.055556,0.027778,0.085992,0.003252,0.003252,0.089431,0.009756,0.009756,0.268293
2021-10-29,5,60d,0.72189,1114.857143,0.0,0.0,0.001429,0.0,0.0,0.003756,0.0,0.0,0.005495,0.0,0.0,0.142857,0.0,0.0,
2021-10-31,9,7d,1.24117,547.380952,0.003175,0.006349,0.00619,0.001705,0.005296,0.019949,0.003968,0.008995,0.015922,0.003968,0.043651,0.265152,0.25,0.25,0.25
2021-11-02,8,14d,1.481462,199.0,0.033333,0.016667,0.018333,0.003743,0.003743,0.01429,0.013889,0.013889,0.028347,0.055556,0.055556,0.367063,,,
2021-11-04,1,60d,1.58136,74.6,0.0,0.03,0.014,0.0,0.018118,0.054683,0.0,0.027262,0.050946,0.0,0.233333,1.0,0.0,,
2021-11-06,1,10YE,1.535865,29.142857,0.057143,0.085714,0.015714,0.041632,0.074527,0.111089,0.119048,0.176587,0.215458,0.119048,0.452381,1.0,0.416667,0.416667,0.416667
2021-11-08,5,7d,0.05054,112.666667,0.0,0.0,0.003333,0.0,0.0,0.018519,0.0,0.0,0.005291,0.0,0.0,0.333333,0.0,0.0,


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,19,19.0,19.0,19.0,17.0,17.0,19.0,19.0,19.0,19.0,...,19.0,19.0,19.0,19.0,17.0,16.0,14.0,12.0,8.0,19.0
mean,2021-11-10 00:00:00,2.080533,0.030557,271.789474,277.352941,293.099334,0.015873,0.02878,0.025952,0.021103,...,0.117214,0.160154,0.464318,0.035088,0.098326,0.104776,0.119744,0.139702,0.24187,0.143785
min,2021-10-23 00:00:00,0.001326,0.000684,0.0,25.0,26.6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068059
25%,2021-11-01 00:00:00,0.924806,0.018896,73.5,92.0,92.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.116144,0.0,0.0,0.0,0.0,0.0,0.0,0.109618
50%,2021-11-10 00:00:00,1.58136,0.028192,204.0,214.0,217.117647,0.0,0.0,0.0,0.006349,...,0.003252,0.059524,0.367063,0.0,0.0,0.0,0.0,0.0,0.259146,0.127815
75%,2021-11-19 00:00:00,3.23502,0.041359,376.5,373.0,376.625,0.0,0.07037,0.050794,0.031667,...,0.227778,0.236111,1.0,0.0,0.004878,0.069817,0.189939,0.270833,0.354167,0.147014
max,2021-11-28 00:00:00,4.820882,0.082268,1118.0,1112.0,1114.857143,0.285714,0.111111,0.133333,0.085714,...,0.533333,0.733333,1.0,0.416667,0.666667,0.666667,0.666667,0.666667,0.666667,0.517652
std,,1.631675,0.019159,285.130103,263.334849,273.343544,0.065446,0.04405,0.039308,0.029015,...,0.170048,0.22319,0.398796,0.108709,0.198011,0.202668,0.213221,0.225139,0.237739,0.094694


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/DEAD FoundationsDAO/models/itemknn-best-test_2d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-10-23,1,7d,0.00089,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-25,15,10YE,0.015036,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-27,10,7d,0.379489,586.333333,0.0,0.066667,0.206667,0.0,0.028559,0.109817,0.0,0.013095,0.109925,0.0,0.003252,0.100813,0.0,0.009756,0.302439
2021-10-29,1,10YE,0.678786,1114.857143,0.0,0.0,0.001429,0.0,0.0,0.003799,0.0,0.0,0.005714,0.0,0.0,0.142857,0.0,0.0,
2021-10-31,5,60d,1.204925,547.380952,0.003175,0.006349,0.00619,0.001705,0.005296,0.019949,0.003968,0.008995,0.015922,0.003968,0.043651,0.265152,0.25,0.25,0.25
2021-11-02,9,7d,1.543672,199.0,0.033333,0.016667,0.018333,0.003743,0.003743,0.014288,0.013889,0.013889,0.028328,0.055556,0.055556,0.367063,,,
2021-11-04,8,14d,1.574131,74.6,0.0,0.03,0.014,0.0,0.015473,0.053359,0.0,0.021905,0.045267,0.0,0.183333,1.0,0.0,,
2021-11-06,1,60d,1.62962,29.142857,0.057143,0.085714,0.015714,0.041632,0.074527,0.111089,0.119048,0.176587,0.215458,0.119048,0.452381,1.0,0.416667,0.416667,0.416667
2021-11-08,1,10YE,1.669652,112.666667,0.0,0.0,0.003333,0.0,0.0,0.018252,0.0,0.0,0.004975,0.0,0.0,0.333333,0.0,0.0,
2021-11-10,5,7d,0.03477,92.0,0.0,0.0,0.01,0.0,0.0,0.009084,0.0,0.0,0.011364,0.0,0.0,1.0,0.0,0.0,


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,19,19.0,19.0,19.0,17.0,17.0,19.0,19.0,19.0,19.0,...,19.0,19.0,19.0,19.0,17.0,16.0,14.0,12.0,8.0,19.0
mean,2021-11-10 00:00:00,2.074137,0.030382,271.789474,277.352941,293.194224,0.015873,0.022932,0.018935,0.020981,...,0.113368,0.153933,0.464107,0.035088,0.098039,0.104167,0.119744,0.140921,0.246138,0.143891
min,2021-10-23 00:00:00,0.00089,0.000649,0.0,25.0,26.6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068809
25%,2021-11-01 00:00:00,0.941855,0.019958,73.5,92.0,92.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.121835,0.0,0.0,0.0,0.0,0.0,0.0,0.107183
50%,2021-11-10 00:00:00,1.62962,0.027367,204.0,214.0,217.0,0.0,0.0,0.0,0.006349,...,0.003252,0.059524,0.367063,0.0,0.0,0.0,0.0,0.0,0.27622,0.122614
75%,2021-11-19 00:00:00,3.115014,0.042582,376.5,373.0,376.625,0.0,0.035979,0.038889,0.031667,...,0.202778,0.227778,1.0,0.0,0.0,0.0625,0.189939,0.270833,0.354167,0.144602
max,2021-11-28 00:00:00,4.866562,0.087348,1118.0,1112.0,1114.857143,0.285714,0.111111,0.08,0.085714,...,0.533333,0.733333,1.0,0.416667,0.666667,0.666667,0.666667,0.666667,0.666667,0.534647
std,,1.60813,0.020085,285.130103,263.334849,273.375356,0.065446,0.039671,0.029832,0.029068,...,0.168725,0.220237,0.398365,0.108709,0.198158,0.202987,0.213221,0.224409,0.238586,0.098753


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))

fold_t             2021-11-28 00:00:00
time_train                    0.595143
time_rec                      0.854028
open_proposals                      27
min_recs                            25
avg_recs                          26.6
precision@1                        0.2
precision@3                   0.133333
precision@5                       0.16
precision@10                      0.08
precision@15                  0.066667
precision@100                    0.014
ndcg@1                            0.05
ndcg@3                        0.038268
ndcg@5                        0.081336
ndcg@10                       0.081336
ndcg@15                       0.093836
ndcg@100                      0.110235
map@1                              0.2
map@3                         0.133333
map@5                         0.233333
map@10                        0.233333
map@15                        0.246667
map@100                       0.265281
recall@1                      0.066667
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

Restored checkpoint from ../.cache/DEAD FoundationsDAO/hparams-userknn_2d_normalize.pkl with 1540 results


  0%|          | 0/1540 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2021-10-21,1,7d,2021-10-21,0.000370,0.000700,1,,,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.070435
1,2021-10-21,1,14d,2021-10-21,0.000275,0.000555,1,,,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.066239
2,2021-10-21,1,21d,2021-10-21,0.000327,0.000684,1,,,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.068696
3,2021-10-21,1,30d,2021-10-21,0.000256,0.000604,1,,,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.067667
4,2021-10-21,1,60d,2021-10-21,0.000330,0.000781,1,,,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.066966
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1535,2021-11-28,15,21d,2021-11-28,0.004069,0.025322,27,25.0,26.6,0.2,...,0.533333,0.733333,1.0,0.333333,0.666667,0.666667,0.666667,0.666667,0.666667,0.128299
1536,2021-11-28,15,30d,2021-11-28,0.007285,0.040669,27,25.0,26.6,0.2,...,0.533333,0.733333,1.0,0.333333,0.666667,0.666667,0.666667,0.666667,0.666667,0.128477
1537,2021-11-28,15,60d,2021-11-28,0.009249,0.043554,27,25.0,26.6,0.2,...,0.533333,0.733333,1.0,0.333333,0.666667,0.666667,0.666667,0.666667,0.666667,0.128666
1538,2021-11-28,15,90d,2021-11-28,0.009326,0.043442,27,25.0,26.6,0.2,...,0.533333,0.733333,1.0,0.333333,0.666667,0.666667,0.666667,0.666667,0.666667,0.128325


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,2,0.001740,292.817193,0.040698,0.033394,0.018094,0.013532,0.019586,0.035061,0.097462,0.105365,0.118429,0.123344,0.160716,0.500259,0.249348,0.284221,0.411077
7d,3,0.001745,292.817193,0.040698,0.025499,0.017919,0.013877,0.016338,0.035032,0.099974,0.102960,0.120413,0.123344,0.161751,0.500173,0.249385,0.280558,0.397248
7d,4,0.001726,292.817193,0.040698,0.025499,0.017919,0.013469,0.015934,0.034650,0.097154,0.100173,0.117643,0.123344,0.161751,0.500173,0.205231,0.232595,0.342452
7d,5,0.001748,292.817193,0.038943,0.025499,0.017919,0.013211,0.015799,0.034533,0.095707,0.099068,0.116595,0.121882,0.161751,0.500173,0.204005,0.232595,0.342452
7d,10,0.001744,292.817193,0.038943,0.025499,0.017919,0.012985,0.015573,0.034308,0.094184,0.097545,0.115072,0.121882,0.161751,0.500173,0.204005,0.232595,0.342452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14d,1,0.002974,293.089979,0.038496,0.024668,0.013638,0.016217,0.020013,0.039199,0.064962,0.070411,0.084302,0.138022,0.178645,0.490394,0.204167,0.218750,0.322835
90d,1,0.005250,293.201144,0.038496,0.024668,0.013623,0.016217,0.020013,0.039383,0.064962,0.070411,0.084368,0.138022,0.178645,0.491428,0.204167,0.218750,0.322835
21d,1,0.004015,293.201144,0.038496,0.024668,0.013623,0.016217,0.020013,0.039383,0.064962,0.070411,0.084368,0.138022,0.178645,0.491428,0.204167,0.218750,0.322835
30d,1,0.005102,293.201144,0.038496,0.024668,0.013623,0.016217,0.020013,0.039383,0.064962,0.070411,0.084368,0.138022,0.178645,0.491428,0.204167,0.218750,0.322835


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/DEAD FoundationsDAO/models/userknn-best-avg_2d_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-10-21,7d,2,2021-10-21,0.000252,0.000531,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066835
2021-10-23,7d,2,2021-10-23,0.000284,0.000599,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065858
2021-10-25,7d,2,2021-10-25,0.000249,0.000501,3,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065837
2021-10-27,7d,2,2021-10-27,0.000798,0.005757,728,448.0,586.333333,0.0,0.0,0.133333,0.233333,...,0.011382,0.01626,0.097561,0.0,0.0,0.009756,0.034146,0.04878,0.292683,0.136829
2021-10-29,7d,2,2021-10-29,0.002112,0.01221,1118,1112.0,1114.857143,0.0,0.0,0.0,0.0,...,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,,0.17059
2021-10-31,7d,2,2021-10-31,0.003396,0.065004,550,509.0,547.380952,0.0,0.0,0.006349,0.004762,...,0.027778,0.037037,0.244067,0.0,0.0,,,,,0.49528
2021-11-02,7d,2,2021-11-02,0.00434,0.021231,204,194.0,199.0,0.0,0.055556,0.066667,0.033333,...,0.048611,0.069444,0.608135,0.0,0.125,0.145833,0.145833,0.145833,0.145833,0.134286
2021-11-04,7d,2,2021-11-04,0.003543,0.025016,76,71.0,74.6,0.1,0.1,0.08,0.05,...,0.266667,0.266667,1.0,0.5,0.416667,0.416667,0.416667,0.416667,0.416667,0.13634
2021-11-06,7d,2,2021-11-06,0.002519,0.01935,32,25.0,29.142857,0.0,0.0,0.028571,0.042857,...,0.285714,0.428571,1.0,0.0,0.0,,,,,0.114381
2021-11-08,7d,2,2021-11-08,0.001265,0.010282,115,110.0,112.666667,0.0,0.0,0.0,0.033333,...,0.333333,0.333333,1.0,0.0,0.0,0.0,,,,0.108011


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/DEAD FoundationsDAO/models/userknn-best-val_2d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-10-21,7,10YE,0.000449,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-23,9,30d,0.000317,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-25,9,30d,0.000256,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-27,2,30d,0.000752,586.333333,0.133333,0.233333,0.2,0.055455,0.1062,0.11481,0.043333,0.12377,0.119751,0.003252,0.011382,0.097561,0.009756,0.034146,0.292683
2021-10-29,3,14d,0.002033,1114.857143,0.0,0.0,0.002857,0.0,0.0,0.00788,0.0,0.0,0.013039,0.0,0.0,0.285714,0.0,0.0,
2021-10-31,9,30d,0.003422,547.380952,0.006349,0.004762,0.004444,0.002858,0.004313,0.016946,0.005952,0.00822,0.013879,0.011905,0.027778,0.244067,0.25,0.25,0.25
2021-11-02,4,90d,0.004399,199.0,0.066667,0.05,0.028333,0.005135,0.005872,0.023292,0.015,0.016937,0.051963,0.048611,0.076389,0.608135,0.145833,0.145833,0.145833
2021-11-04,1,7d,0.003521,74.6,0.1,0.05,0.014,0.048782,0.048782,0.079947,0.15,0.15,0.16579,0.266667,0.266667,1.0,0.416667,0.416667,0.416667
2021-11-06,1,14d,0.004917,29.142857,0.057143,0.071429,0.015714,0.029387,0.056624,0.093815,0.065476,0.104592,0.148705,0.214286,0.47619,1.0,,,
2021-11-08,1,21d,0.00516,112.666667,0.0,0.033333,0.01,0.0,0.033448,0.071891,0.0,0.037037,0.049672,0.0,0.333333,1.0,0.0,,


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,19.0,17.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,18.0,16.0,13.0
mean,0.003222,293.144248,0.050359,0.039985,0.018281,0.020258,0.026647,0.042044,0.119659,0.128982,0.140283,0.162003,0.200274,0.49783,0.295681,0.334165,0.431168
min,0.000256,26.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.00184,92.0,0.0,0.0,0.002198,0.0,0.0,0.006836,0.0,0.0,0.008202,0.0,0.0,0.125704,0.0,0.0,0.0
50%,0.003218,217.0625,0.006349,0.033333,0.00875,0.002858,0.005872,0.023292,0.005952,0.016937,0.049672,0.003252,0.038462,0.444444,0.004878,0.08999,0.292683
75%,0.004658,376.625,0.083333,0.05625,0.014,0.042895,0.050094,0.075919,0.177431,0.183681,0.193082,0.3,0.333333,1.0,0.604167,0.708333,0.833333
max,0.008689,1114.857143,0.2,0.233333,0.2,0.081336,0.1062,0.11481,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,0.00223,273.39092,0.063172,0.056711,0.044639,0.026729,0.032551,0.040481,0.237321,0.235301,0.233071,0.262326,0.26899,0.406822,0.40748,0.417195,0.414845


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/DEAD FoundationsDAO/models/userknn-best-test_2d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-10-23,7,10YE,2021-10-23,0.000391,0.000951,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066375
2021-10-25,9,30d,2021-10-25,0.000256,0.000592,3,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064856
2021-10-27,9,30d,2021-10-27,0.00077,0.005888,728,448.0,586.333333,0.0,0.0,0.133333,0.066667,...,0.003252,0.00813,0.095935,0.0,0.0,0.009756,0.009756,0.02439,0.287805,0.140314
2021-10-29,2,30d,2021-10-29,0.002011,0.012214,1118,1112.0,1114.857143,0.0,0.0,0.0,0.0,...,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,,0.169801
2021-10-31,3,14d,2021-10-31,0.003466,0.065175,550,509.0,547.380952,0.0,0.005291,0.006349,0.004762,...,0.027778,0.037037,0.244067,0.0,0.25,0.25,0.25,0.25,0.25,0.507268
2021-11-02,9,30d,2021-11-02,0.004326,0.021152,204,194.0,199.0,0.0,0.0,0.033333,0.05,...,0.076389,0.125,0.608135,0.0,0.0,0.125,0.145833,0.145833,0.145833,0.127055
2021-11-04,4,90d,2021-11-04,0.004829,0.025957,76,71.0,74.6,0.1,0.1,0.08,0.05,...,0.266667,0.266667,1.0,0.5,0.416667,0.416667,0.416667,0.416667,0.416667,0.136489
2021-11-06,1,7d,2021-11-06,0.00247,0.019124,32,25.0,29.142857,0.0,0.047619,0.057143,0.042857,...,0.285714,0.428571,1.0,0.0,,,,,,0.121848
2021-11-08,1,14d,2021-11-08,0.005128,0.0222,115,110.0,112.666667,0.0,0.0,0.0,0.033333,...,0.333333,0.333333,1.0,0.0,0.0,0.0,,,,0.108116
2021-11-10,1,21d,2021-11-10,0.005219,0.022854,92,92.0,92.0,0.0,0.111111,0.133333,0.066667,...,0.666667,0.666667,1.0,0.0,,,,,,0.120949


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,19,19.0,19.0,19.0,17.0,17.0,19.0,19.0,19.0,19.0,...,19.0,19.0,19.0,19.0,15.0,15.0,13.0,12.0,9.0,19.0
mean,2021-11-10 00:00:00,0.00319,0.021055,271.789474,277.352941,293.057928,0.005263,0.032171,0.036938,0.023716,...,0.144062,0.186212,0.499216,0.026316,0.088889,0.097873,0.114533,0.125296,0.19633,0.141114
min,2021-10-23 00:00:00,0.000256,0.000592,0.0,25.0,26.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064856
25%,2021-11-01 00:00:00,0.001644,0.011762,73.5,92.0,92.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.138877,0.0,0.0,0.0,0.0,0.0,0.0,0.109699
50%,2021-11-10 00:00:00,0.002505,0.019541,204.0,214.0,217.117647,0.0,0.0,0.0,0.007143,...,0.027778,0.125,0.444444,0.0,0.0,0.0,0.0,0.0,0.145833,0.125104
75%,2021-11-19 00:00:00,0.004577,0.024405,376.5,373.0,376.625,0.0,0.07381,0.066667,0.046429,...,0.309524,0.333333,1.0,0.0,0.0,0.067378,0.145833,0.171875,0.287805,0.139631
max,2021-11-28 00:00:00,0.009141,0.065175,1118.0,1112.0,1114.857143,0.1,0.125,0.133333,0.066667,...,0.666667,0.666667,1.0,0.5,0.666667,0.666667,0.666667,0.666667,0.666667,0.507268
std,,0.002366,0.016068,285.130103,263.334849,273.401961,0.022942,0.050163,0.049979,0.026197,...,0.195038,0.210631,0.405646,0.114708,0.200363,0.198672,0.210434,0.216171,0.233919,0.092793
