In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
MAX_BATCH_SIZE = 9
ORG_NAME = "PancakeSwap"
SPLITS_FREQ = "3d"
LAST_FOLDS = 10
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2023-06-27"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3119 entries, 0 to 3118
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 3119 non-null   object        
 1   author             3119 non-null   object        
 2   date               3119 non-null   datetime64[us]
 3   start              3119 non-null   datetime64[us]
 4   end                3119 non-null   datetime64[us]
 5   platform_proposal  3119 non-null   object        
dtypes: datetime64[us](3), object(3)
memory usage: 146.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 532830 entries, 0 to 532829
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype         
---  ------    --------------   -----         
 0   id        532830 non-null  object        
 1   proposal  532830 non-null  object        
 2   voter     532830 non-null  object        
 3   date      532830 non-null  datetime64[

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0x8b017905dc96b38f817473dc885f84d4c76bc113,c3d4fe9b-5193-5b39-a85d-60cdf62f8ebe,2021-06-28 12:14:46,1
1,0x9e882c739493886ab2865803432d3140b6473bfc,c3d4fe9b-5193-5b39-a85d-60cdf62f8ebe,2021-06-28 12:41:42,1
2,0x1e5c8c04e56f88c09ad775e43a164de264d6dad7,c3d4fe9b-5193-5b39-a85d-60cdf62f8ebe,2021-06-28 12:42:56,1
3,0x977e0c1005dff8749f8cac22f4df0bd5f013d1a7,c3d4fe9b-5193-5b39-a85d-60cdf62f8ebe,2021-06-28 12:54:31,1
4,0x1ce49ed469b2c0edf50b574cea9a196bbe23655b,c3d4fe9b-5193-5b39-a85d-60cdf62f8ebe,2021-06-28 16:02:34,1
...,...,...,...,...
532825,0xd554664fb7e5c4ad17a5f32343843f30eb4c093d,718fe10e-25cf-5c87-b966-99e1ca411fb1,2023-07-20 03:06:58,1
532826,0x71c9137eb29ce52e7bdab9ee5cac25208f910665,718fe10e-25cf-5c87-b966-99e1ca411fb1,2023-07-20 03:16:03,1
532827,0x843146d71d57af0e731f85db1f094643776c8e91,718fe10e-25cf-5c87-b966-99e1ca411fb1,2023-07-20 03:33:17,1
532828,0x8dcad3b779415f519c21bb42c9a6743e8cef1c5d,718fe10e-25cf-5c87-b966-99e1ca411fb1,2023-07-20 03:47:32,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2023-05-31 00:00:00'),
 Timestamp('2023-06-03 00:00:00'),
 Timestamp('2023-06-06 00:00:00'),
 Timestamp('2023-06-09 00:00:00'),
 Timestamp('2023-06-12 00:00:00'),
 Timestamp('2023-06-15 00:00:00'),
 Timestamp('2023-06-18 00:00:00'),
 Timestamp('2023-06-21 00:00:00'),
 Timestamp('2023-06-24 00:00:00'),
 Timestamp('2023-06-27 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]


fold_t             2023-06-27 00:00:00
time_train                    5.559299
time_rec                      0.230423
open_proposals                      57
min_recs                            41
avg_recs                     55.295082
precision@1                   0.065574
precision@3                   0.081967
precision@5                   0.078689
precision@10                  0.065574
precision@15                  0.069945
precision@100                 0.021475
ndcg@1                        0.017167
ndcg@3                        0.027233
ndcg@5                        0.034142
ndcg@10                       0.047928
ndcg@15                       0.062564
ndcg@100                      0.096521
map@1                         0.065574
map@3                          0.07969
map@5                          0.09337
map@10                        0.112872
map@15                        0.131656
map@100                       0.170585
recall@1                      0.030328
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/PancakeSwap/hparams-itemknn_3d_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2023-05-31,1,7d,2023-05-31,0.003848,0.113371,64,52,62.376344,0.182796,...,0.609568,0.716093,1.0,0.788261,0.732067,0.687938,0.652905,0.662521,0.670213,0.226509
1,2023-05-31,1,14d,2023-05-31,0.007699,0.237422,64,44,61.920635,0.261905,...,0.520910,0.537652,1.0,0.911124,0.892134,0.872908,0.884031,0.887734,0.899772,0.249919
2,2023-05-31,1,21d,2023-05-31,0.012448,0.174414,64,34,61.323741,0.251799,...,0.473561,0.520233,1.0,0.966667,0.943243,0.921691,0.887291,0.895715,0.909809,0.260138
3,2023-05-31,1,30d,2023-05-31,0.019237,0.219134,64,28,61.258824,0.264706,...,0.454209,0.502496,1.0,0.937778,0.888250,0.888930,0.878121,0.871702,0.882955,0.284591
4,2023-05-31,1,60d,2023-05-31,0.153663,0.425726,64,24,61.551020,0.306122,...,0.449752,0.476842,1.0,0.950444,0.902678,0.894407,0.901292,0.899120,0.906207,0.346990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2023-06-27,15,21d,2023-06-27,0.032851,0.111060,57,32,54.690141,0.042254,...,0.243897,0.358451,1.0,0.533333,0.385185,0.363333,0.373333,0.373333,0.373333,0.194992
766,2023-06-27,15,30d,2023-06-27,0.048242,0.140513,57,32,54.536585,0.012195,...,0.166057,0.245325,1.0,1.000000,0.409524,0.379167,0.379167,0.379167,0.379167,0.203562
767,2023-06-27,15,60d,2023-06-27,0.131632,0.213016,57,28,55.008850,0.026549,...,0.179499,0.227434,1.0,0.555556,0.392593,0.370000,0.370000,0.370000,0.370000,0.231238
768,2023-06-27,15,90d,2023-06-27,0.424113,0.326313,57,28,55.138211,0.024390,...,0.161401,0.214699,1.0,0.555556,0.393750,0.368519,0.360238,0.360238,0.360238,0.240739


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,15,0.006876,66.931428,0.114580,0.096881,0.029252,0.041294,0.048874,0.084202,0.194482,0.209584,0.257663,0.226517,0.328740,0.990079,0.475990,0.525359,0.554182
7d,10,0.007712,66.931428,0.113705,0.096253,0.029252,0.041074,0.048552,0.083965,0.193064,0.208683,0.256512,0.226000,0.327097,0.990079,0.472353,0.523124,0.550748
7d,9,0.006623,66.931428,0.114395,0.096220,0.029252,0.041095,0.048554,0.083914,0.193519,0.208635,0.256389,0.227153,0.327242,0.990079,0.474543,0.525317,0.552418
7d,8,0.007287,66.931428,0.114799,0.096220,0.029252,0.041216,0.048557,0.083898,0.193759,0.208634,0.256282,0.227557,0.327242,0.990079,0.472705,0.523348,0.550405
7d,7,0.007486,66.931428,0.114168,0.095590,0.029252,0.040842,0.048085,0.083500,0.190603,0.205453,0.253204,0.227185,0.326553,0.990079,0.464119,0.512683,0.540150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10YE,4,1.622365,68.337975,0.058078,0.046200,0.022612,0.086278,0.105491,0.231620,0.103438,0.112791,0.152693,0.154300,0.233125,0.977576,0.449212,0.467567,0.490463
10YE,5,1.622270,68.337975,0.058905,0.046068,0.022617,0.086301,0.104764,0.231512,0.103502,0.112652,0.152725,0.153208,0.229210,0.977609,0.451094,0.467381,0.488224
10YE,3,1.615691,68.337975,0.057407,0.045885,0.022603,0.085343,0.104980,0.231479,0.102592,0.111963,0.152163,0.150183,0.232212,0.977271,0.452745,0.468529,0.492873
10YE,2,1.612575,68.337975,0.056288,0.045990,0.022612,0.084622,0.104255,0.231328,0.101293,0.110903,0.151205,0.148068,0.227779,0.977636,0.450949,0.466762,0.488395


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/PancakeSwap/models/itemknn-best-avg_3d_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-31,7d,15,2023-05-31,0.003716,0.110213,64,52,62.376344,0.172043,0.103943,0.15914,0.163441,...,0.624946,0.772805,1.0,0.816694,0.667483,0.596036,0.646421,0.658766,0.666174,0.297304
2023-06-03,7d,15,2023-06-03,0.003854,0.070936,42,28,40.413043,0.152174,0.130435,0.121739,0.136957,...,0.37293,0.545963,1.0,0.177551,0.226905,0.306429,0.438571,0.438571,0.438571,0.164054
2023-06-06,7d,15,2023-06-06,0.012336,0.075304,50,34,46.072727,0.109091,0.078788,0.061818,0.052727,...,0.212266,0.282814,1.0,0.546296,0.45086,0.415023,0.430329,0.435431,0.435431,0.172092
2023-06-09,7d,15,2023-06-09,0.004129,0.035556,60,37,52.423077,0.192308,0.192308,0.2,0.2,...,0.35145,0.383745,1.0,0.182143,0.325728,0.478836,0.620862,0.677324,0.728345,0.156908
2023-06-12,7d,15,2023-06-12,0.003206,0.043981,48,29,42.851852,0.222222,0.148148,0.133333,0.118519,...,0.245191,0.381753,1.0,0.319963,0.36456,0.385592,0.46044,0.477534,0.477534,0.151997
2023-06-15,7d,15,2023-06-15,0.003933,0.077367,91,50,88.396825,0.650794,0.243386,0.161905,0.101587,...,0.612458,0.6403,1.0,0.914332,0.89888,0.905526,0.897322,0.882357,0.905356,0.216995
2023-06-18,7d,15,2023-06-18,0.006364,0.072906,86,47,80.833333,0.041667,0.027778,0.025,0.022917,...,0.024306,0.082093,1.0,0.14881,0.196429,0.202381,0.25,0.313492,0.361111,0.190523
2023-06-21,7d,15,2023-06-21,0.01329,0.084414,126,54,121.672414,0.034483,0.034483,0.02069,0.02069,...,0.064696,0.072496,0.910714,0.183333,0.251905,0.251905,0.251905,0.251905,0.251905,0.243691
2023-06-24,7d,15,2023-06-24,0.010015,0.106409,75,58,73.430108,0.344086,0.265233,0.169892,0.105376,...,0.490715,0.559951,1.0,0.52691,0.784897,0.766553,0.760472,0.765763,0.771054,0.245281
2023-06-27,7d,15,2023-06-27,0.004753,0.051368,57,51,56.289474,0.184211,0.140351,0.136842,0.113158,...,0.584649,0.716228,1.0,0.57381,0.531667,0.571667,0.618333,0.618333,0.618333,0.168257


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/PancakeSwap/models/itemknn-best-val_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-31,15,90d,0.349556,61.612403,0.09845,0.075581,0.027481,0.233062,0.258035,0.346941,0.343086,0.3678,0.401272,0.388891,0.477245,1.0,0.911237,0.882669,0.895464
2023-06-03,15,7d,0.003854,40.413043,0.121739,0.136957,0.028696,0.043386,0.067616,0.119517,0.09471,0.140921,0.211108,0.155383,0.37293,1.0,0.306429,0.438571,0.438571
2023-06-06,2,14d,0.006915,46.447761,0.059701,0.064179,0.027612,0.049846,0.066028,0.147572,0.100945,0.113032,0.173546,0.128956,0.235223,1.0,0.472314,0.432242,0.441171
2023-06-09,15,7d,0.004129,52.423077,0.2,0.2,0.042692,0.032365,0.040898,0.06376,0.195,0.215811,0.261412,0.161783,0.35145,1.0,0.478836,0.620862,0.728345
2023-06-12,10,7d,0.005076,42.851852,0.133333,0.118519,0.031481,0.031155,0.036699,0.071039,0.144938,0.153941,0.219093,0.147049,0.245191,1.0,0.385592,0.444567,0.461661
2023-06-15,9,7d,0.003889,88.396825,0.161905,0.104762,0.036508,0.102471,0.102954,0.118231,0.628995,0.627388,0.652528,0.603684,0.618337,1.0,0.905526,0.898184,0.906197
2023-06-18,1,21d,0.023141,81.081081,0.040541,0.02973,0.021081,0.021496,0.023755,0.0869,0.051464,0.051048,0.084187,0.068533,0.095238,1.0,0.480952,0.509524,0.585714
2023-06-21,15,60d,0.148891,122.345912,0.01761,0.020755,0.023585,0.014867,0.024111,0.094226,0.022773,0.030483,0.056216,0.044819,0.09998,0.788385,0.312619,0.283664,0.283664
2023-06-24,1,14d,0.02399,73.359756,0.176829,0.10061,0.021524,0.126754,0.131432,0.169706,0.375315,0.381232,0.410666,0.465084,0.50859,1.0,0.875411,0.884442,0.886905
2023-06-27,15,7d,0.004753,56.289474,0.136842,0.113158,0.021053,0.045304,0.057661,0.077119,0.21364,0.251714,0.293632,0.380702,0.584649,1.0,0.571667,0.618333,0.618333


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-15 00:00:00,0.02496,0.11499,70.555556,38.555556,67.067642,0.22197,0.139185,0.1165,0.098741,...,0.345732,0.419003,0.976487,0.447762,0.503272,0.53215,0.570043,0.580905,0.594507,0.219772
min,2023-06-03 00:00:00,0.003854,0.035556,42.0,28.0,40.413043,0.037736,0.018868,0.01761,0.020755,...,0.095238,0.113642,0.788385,0.177551,0.226905,0.306429,0.283664,0.283664,0.283664,0.150987
25%,2023-06-09 00:00:00,0.004129,0.051368,50.0,33.0,46.447761,0.149254,0.064677,0.059701,0.064179,...,0.235223,0.295185,1.0,0.319963,0.334524,0.385592,0.438571,0.441171,0.441171,0.164054
50%,2023-06-15 00:00:00,0.005076,0.077566,60.0,37.0,56.289474,0.184211,0.140351,0.133333,0.104762,...,0.35145,0.383745,1.0,0.392857,0.411905,0.478836,0.509524,0.538095,0.585714,0.179236
75%,2023-06-21 00:00:00,0.023141,0.12669,86.0,48.0,81.081081,0.222222,0.192308,0.161905,0.118519,...,0.50859,0.569691,1.0,0.57381,0.561508,0.571667,0.620862,0.677324,0.728345,0.216975
max,2023-06-27 00:00:00,0.148891,0.335033,126.0,51.0,122.345912,0.650794,0.278455,0.2,0.2,...,0.618337,0.716228,1.0,0.914332,0.89888,0.905526,0.898184,0.884883,0.906197,0.418521
std,,0.047191,0.095691,26.907764,8.903807,26.924135,0.183172,0.089374,0.063416,0.055217,...,0.194985,0.216333,0.070538,0.22968,0.240296,0.220359,0.208905,0.20598,0.213103,0.088825


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/PancakeSwap/models/itemknn-best-test_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-06-03,15,90d,0.345764,39.663043,0.045652,0.06087,0.029783,0.040482,0.067773,0.19202,0.04442,0.061308,0.129088,0.079438,0.209717,1.0,0.226039,0.236201,0.323273
2023-06-06,15,7d,0.012336,46.072727,0.061818,0.052727,0.027818,0.039861,0.049745,0.119894,0.092273,0.104163,0.165437,0.129885,0.212266,1.0,0.415023,0.430329,0.435431
2023-06-09,2,14d,0.007181,55.62963,0.096296,0.090741,0.028333,0.033339,0.041491,0.10017,0.089712,0.098483,0.145483,0.084223,0.180431,1.0,0.360169,0.521032,0.614484
2023-06-12,15,7d,0.003206,42.851852,0.133333,0.118519,0.031481,0.031063,0.036674,0.071027,0.143827,0.153334,0.218787,0.147049,0.245191,1.0,0.385592,0.46044,0.477534
2023-06-15,10,7d,0.003882,88.396825,0.161905,0.103175,0.036508,0.102471,0.102594,0.118201,0.628995,0.626213,0.6522,0.603684,0.613046,1.0,0.905526,0.898184,0.906197
2023-06-18,9,7d,0.006359,80.833333,0.029167,0.022917,0.022917,0.006709,0.006678,0.053627,0.027083,0.021944,0.064565,0.013641,0.024306,1.0,0.218254,0.25,0.345238
2023-06-21,1,21d,0.034887,121.278846,0.021154,0.016346,0.025096,0.009115,0.011592,0.057839,0.021947,0.024817,0.050124,0.034066,0.059226,0.740298,0.380357,0.380357,0.343943
2023-06-24,15,60d,0.152985,73.506803,0.160544,0.087075,0.021667,0.209832,0.216972,0.284356,0.340182,0.344533,0.369072,0.452076,0.490586,1.0,0.844436,0.837915,0.842258
2023-06-27,1,14d,0.022218,55.295082,0.068852,0.062295,0.021475,0.034653,0.046243,0.097849,0.101876,0.119941,0.178893,0.171858,0.293716,1.0,0.498148,0.542593,0.542593


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-15 00:00:00,0.065424,0.15071,70.555556,35.0,67.058682,0.179057,0.109315,0.086525,0.068296,...,0.25872,0.344768,0.971144,0.429854,0.459011,0.470394,0.506339,0.523739,0.536772,0.229039
min,2023-06-03 00:00:00,0.003206,0.043981,42.0,13.0,39.663043,0.028846,0.022436,0.021154,0.016346,...,0.024306,0.06126,0.740298,0.14881,0.187348,0.218254,0.236201,0.264894,0.323273,0.151997
25%,2023-06-09 00:00:00,0.006359,0.075304,50.0,30.0,46.072727,0.065217,0.057971,0.045652,0.052727,...,0.180431,0.282814,1.0,0.183405,0.264782,0.360169,0.380357,0.380357,0.345238,0.177781
50%,2023-06-15 00:00:00,0.012336,0.085394,60.0,34.0,55.62963,0.098361,0.078788,0.068852,0.062295,...,0.212266,0.348725,1.0,0.444444,0.408333,0.385592,0.46044,0.477534,0.477534,0.189919
75%,2023-06-21 00:00:00,0.034887,0.158381,86.0,41.0,80.833333,0.222222,0.148148,0.133333,0.090741,...,0.293716,0.513881,1.0,0.546296,0.502083,0.498148,0.542593,0.574306,0.614484,0.216178
max,2023-06-27 00:00:00,0.345764,0.443528,126.0,50.0,121.278846,0.650794,0.247166,0.161905,0.118519,...,0.613046,0.64062,1.0,0.914332,0.89888,0.905526,0.898184,0.884883,0.906197,0.46974
std,,0.115333,0.135319,26.907764,10.931606,26.627921,0.197792,0.085472,0.054319,0.034797,...,0.189127,0.196527,0.086567,0.249701,0.261349,0.245921,0.231153,0.216552,0.214796,0.100435


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))

fold_t             2023-06-27 00:00:00
time_train                    0.596136
time_rec                      0.915451
open_proposals                      57
min_recs                            41
avg_recs                     55.295082
precision@1                   0.163934
precision@3                   0.114754
precision@5                   0.114754
precision@10                  0.116393
precision@15                  0.100546
precision@100                 0.021475
ndcg@1                        0.042918
ndcg@3                        0.046455
ndcg@5                        0.059745
ndcg@10                       0.087444
ndcg@15                       0.101191
ndcg@100                      0.119263
map@1                         0.163934
map@3                         0.154827
map@5                         0.181516
map@10                        0.225738
map@15                        0.247205
map@100                       0.270672
recall@1                      0.088251
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

Restored checkpoint from ../.cache/PancakeSwap/hparams-userknn_3d_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2023-05-31,1,7d,2023-05-31,0.002344,0.089452,64,52,62.376344,0.215054,...,0.585273,0.801821,1.0,0.749188,0.619884,0.616710,0.606910,0.616230,0.623301,0.229134
1,2023-05-31,1,14d,2023-05-31,0.003382,0.120444,64,44,61.920635,0.309524,...,0.431775,0.638483,1.0,0.849817,0.780141,0.764140,0.780256,0.784966,0.792937,0.251343
2,2023-05-31,1,21d,2023-05-31,0.004757,0.138926,64,34,61.323741,0.323741,...,0.468232,0.563564,1.0,0.874074,0.838889,0.799753,0.803865,0.810540,0.822859,0.260013
3,2023-05-31,1,30d,2023-05-31,0.007254,0.176134,64,28,61.258824,0.323529,...,0.445094,0.587145,1.0,0.891515,0.822326,0.818549,0.808157,0.813441,0.822784,0.283484
4,2023-05-31,1,60d,2023-05-31,0.026225,0.357759,64,24,61.551020,0.355102,...,0.463693,0.567313,1.0,0.946169,0.871336,0.852210,0.855346,0.861472,0.867419,0.342470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2023-06-27,15,21d,2023-06-27,0.005408,0.089637,57,32,54.690141,0.070423,...,0.358685,0.556338,1.0,0.666667,0.473611,0.452381,0.459524,0.459524,0.459524,0.194519
766,2023-06-27,15,30d,2023-06-27,0.008131,0.115798,57,32,54.536585,0.036585,...,0.216057,0.332114,1.0,0.611111,0.472222,0.400000,0.366667,0.366667,0.366667,0.203868
767,2023-06-27,15,60d,2023-06-27,0.015700,0.179438,57,28,55.008850,0.017699,...,0.202532,0.303810,1.0,0.666667,0.500000,0.409524,0.372222,0.372222,0.372222,0.228069
768,2023-06-27,15,90d,2023-06-27,0.036136,0.287198,57,28,55.138211,0.024390,...,0.173019,0.268570,1.0,0.361111,0.345238,0.329082,0.301835,0.301835,0.301835,0.237735


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,2,0.002104,66.931428,0.141783,0.117636,0.029252,0.048642,0.058328,0.089588,0.220883,0.239995,0.284820,0.265922,0.406401,0.990079,0.501901,0.546902,0.571227
7d,7,0.002098,66.931428,0.140172,0.114535,0.029252,0.048657,0.056935,0.089474,0.221432,0.237558,0.284908,0.268172,0.386838,0.990079,0.506976,0.555313,0.582653
7d,6,0.002118,66.931428,0.139233,0.113931,0.029252,0.048421,0.056895,0.089406,0.222033,0.237365,0.284508,0.263466,0.386398,0.990079,0.511332,0.557243,0.586656
7d,5,0.002106,66.931428,0.138441,0.114639,0.029252,0.048200,0.057050,0.089252,0.220292,0.236734,0.283220,0.264259,0.393202,0.990079,0.503809,0.552595,0.579401
7d,8,0.002091,66.931428,0.139768,0.114033,0.029252,0.048173,0.056756,0.089280,0.219919,0.236430,0.284041,0.266152,0.388816,0.990079,0.508054,0.549413,0.577045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10YE,7,0.373152,68.337975,0.062664,0.050433,0.022627,0.097618,0.118713,0.243028,0.117679,0.128710,0.170430,0.176682,0.258338,0.977258,0.439746,0.448840,0.477664
10YE,8,0.374141,68.337975,0.062613,0.049926,0.022627,0.097472,0.117216,0.242196,0.117219,0.127345,0.169228,0.178356,0.253791,0.977258,0.446537,0.448621,0.475682
10YE,9,0.373262,68.337975,0.061622,0.050230,0.022627,0.096555,0.116835,0.241882,0.116535,0.127107,0.168939,0.176566,0.252143,0.977258,0.448401,0.443758,0.473248
10YE,10,0.374293,68.337975,0.059633,0.049518,0.022627,0.094437,0.115579,0.240970,0.114579,0.125708,0.167560,0.171419,0.250455,0.977258,0.446769,0.450091,0.479723


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/PancakeSwap/models/userknn-best-avg_3d_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-31,7d,2,2023-05-31,0.002284,0.087472,64,52,62.376344,0.204301,0.139785,0.109677,0.172043,...,0.642283,0.801445,1.0,0.800399,0.641934,0.623739,0.625602,0.634648,0.641511,0.227445
2023-06-03,7d,2,2023-06-03,0.002602,0.057687,42,28,40.413043,0.173913,0.173913,0.186957,0.173913,...,0.458799,0.634058,1.0,0.390774,0.391905,0.509821,0.590327,0.590327,0.590327,0.163327
2023-06-06,7d,2,2023-06-06,0.002253,0.05937,50,34,46.072727,0.145455,0.133333,0.12,0.101818,...,0.299048,0.368139,1.0,0.314484,0.37877,0.374127,0.44873,0.44873,0.44873,0.171341
2023-06-09,7d,2,2023-06-09,0.001166,0.025381,60,37,52.423077,0.230769,0.24359,0.238462,0.219231,...,0.434356,0.533654,1.0,0.098082,0.30172,0.417681,0.524868,0.564815,0.608466,0.156368
2023-06-12,7d,2,2023-06-12,0.001609,0.032424,48,29,42.851852,0.222222,0.148148,0.148148,0.133333,...,0.292484,0.406444,1.0,0.194963,0.280926,0.344505,0.445467,0.445467,0.445467,0.152097
2023-06-15,7d,2,2023-06-15,0.001945,0.061724,91,50,88.396825,0.619048,0.232804,0.149206,0.096825,...,0.687283,0.705554,1.0,0.975034,0.905115,0.887452,0.877012,0.883398,0.907778,0.214309
2023-06-18,7d,2,2023-06-18,0.002326,0.053738,86,47,80.833333,0.0625,0.048611,0.033333,0.0375,...,0.11875,0.288641,1.0,0.415873,0.377381,0.389286,0.425,0.484524,0.520238,0.188537
2023-06-21,7d,2,2023-06-21,0.002624,0.063945,126,54,121.672414,0.034483,0.028736,0.048276,0.044828,...,0.161029,0.280036,0.910714,0.375,0.240476,0.285034,0.263294,0.263294,0.263294,0.235532
2023-06-24,7d,2,2023-06-24,0.002451,0.084703,75,58,73.430108,0.301075,0.293907,0.204301,0.135484,...,0.594897,0.684997,1.0,0.556689,0.708748,0.71087,0.729089,0.73323,0.738406,0.241705
2023-06-27,7d,2,2023-06-27,0.001955,0.040747,57,51,56.289474,0.210526,0.175439,0.147368,0.115789,...,0.610965,0.71886,1.0,0.44375,0.541667,0.598333,0.618333,0.618333,0.618333,0.167579


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/PancakeSwap/models/userknn-best-val_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-31,2,90d,0.039269,61.612403,0.099225,0.083333,0.027481,0.25405,0.284683,0.369276,0.382306,0.40674,0.440411,0.406741,0.519981,1.0,0.872174,0.863884,0.873881
2023-06-03,2,7d,0.002602,40.413043,0.186957,0.173913,0.028696,0.066946,0.091514,0.134403,0.180471,0.233598,0.288482,0.233489,0.458799,1.0,0.509821,0.590327,0.590327
2023-06-06,5,7d,0.002228,46.072727,0.127273,0.098182,0.027818,0.062319,0.071698,0.132628,0.155879,0.165419,0.216512,0.194127,0.284401,1.0,0.479812,0.52495,0.538343
2023-06-09,1,7d,0.001142,52.423077,0.261538,0.219231,0.042692,0.043755,0.046146,0.069626,0.262179,0.25506,0.30542,0.210607,0.331578,1.0,0.413977,0.552028,0.647002
2023-06-12,1,7d,0.001621,42.851852,0.148148,0.125926,0.031481,0.035555,0.04049,0.07284,0.16642,0.17248,0.231259,0.194051,0.285077,1.0,0.425755,0.514217,0.514217
2023-06-15,4,7d,0.00195,88.396825,0.152381,0.098413,0.036508,0.104547,0.106699,0.121318,0.631111,0.638122,0.6712,0.652997,0.684835,1.0,0.889518,0.904801,0.916495
2023-06-18,1,60d,0.024918,82.429825,0.04386,0.034211,0.020439,0.048023,0.057618,0.151465,0.079503,0.084269,0.119952,0.104887,0.166931,1.0,0.530476,0.510218,0.528075
2023-06-21,15,7d,0.002584,121.672414,0.07931,0.067241,0.027586,0.01471,0.020244,0.047299,0.052399,0.066647,0.108161,0.138834,0.221128,0.910714,0.308631,0.272302,0.272302
2023-06-24,15,14d,0.004185,73.359756,0.191463,0.114024,0.021524,0.139277,0.147279,0.182359,0.413985,0.426229,0.457618,0.496792,0.567136,1.0,0.834127,0.823338,0.827842
2023-06-27,3,7d,0.001973,56.289474,0.152632,0.115789,0.021053,0.050571,0.062324,0.08044,0.257368,0.283749,0.321317,0.394298,0.610965,1.0,0.634848,0.65303,0.65303


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,0.0048,67.100999,0.149285,0.116326,0.028644,0.062856,0.071557,0.110264,0.244368,0.258397,0.302213,0.29112,0.401206,0.990079,0.558552,0.593912,0.609737
min,0.001142,40.413043,0.04386,0.034211,0.020439,0.01471,0.020244,0.047299,0.052399,0.066647,0.108161,0.104887,0.166931,0.910714,0.308631,0.272302,0.272302
25%,0.00195,46.072727,0.127273,0.098182,0.021524,0.043755,0.046146,0.07284,0.155879,0.165419,0.216512,0.194051,0.284401,1.0,0.425755,0.514217,0.528075
50%,0.002228,56.289474,0.152381,0.114024,0.027818,0.050571,0.062324,0.121318,0.180471,0.233598,0.288482,0.210607,0.331578,1.0,0.509821,0.552028,0.590327
75%,0.002602,82.429825,0.186957,0.125926,0.031481,0.066946,0.091514,0.134403,0.262179,0.283749,0.321317,0.394298,0.567136,1.0,0.634848,0.65303,0.65303
max,0.024918,121.672414,0.261538,0.219231,0.042692,0.139277,0.147279,0.182359,0.631111,0.638122,0.6712,0.652997,0.684835,1.0,0.889518,0.904801,0.916495
std,0.007592,26.880484,0.063558,0.05455,0.007456,0.03774,0.038553,0.04473,0.180571,0.179145,0.174685,0.183851,0.185265,0.029762,0.194185,0.185774,0.187104


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/PancakeSwap/models/userknn-best-test_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-06-03,2,90d,2023-06-03,0.037928,0.263251,42,13,39.663043,0.130435,0.108696,0.093478,0.081522,...,0.316891,0.446291,1.0,0.334163,0.361337,0.416157,0.436661,0.458043,0.497115,0.191489
2023-06-06,2,7d,2023-06-06,0.002253,0.05937,50,34,46.072727,0.145455,0.133333,0.12,0.101818,...,0.299048,0.368139,1.0,0.314484,0.37877,0.374127,0.44873,0.44873,0.44873,0.171341
2023-06-09,5,7d,2023-06-09,0.001208,0.025509,60,37,52.423077,0.153846,0.205128,0.223077,0.215385,...,0.431151,0.515156,1.0,0.102679,0.273243,0.377877,0.541865,0.599603,0.644246,0.156706
2023-06-12,1,7d,2023-06-12,0.001621,0.032629,48,29,42.851852,0.259259,0.17284,0.148148,0.125926,...,0.285077,0.394336,1.0,0.309969,0.37081,0.425755,0.514217,0.514217,0.514217,0.152054
2023-06-15,1,7d,2023-06-15,0.001995,0.061906,91,50,88.396825,0.460317,0.227513,0.142857,0.092063,...,0.696915,0.723635,1.0,0.966425,0.906869,0.88137,0.860778,0.849765,0.88418,0.214813
2023-06-18,4,7d,2023-06-18,0.002346,0.054077,86,47,80.833333,0.041667,0.041667,0.0375,0.0375,...,0.069643,0.145536,1.0,0.52381,0.389881,0.370952,0.418571,0.447143,0.475714,0.189651
2023-06-21,1,60d,2023-06-21,0.021888,0.273931,126,37,122.345912,0.044025,0.035639,0.030189,0.022013,...,0.112639,0.141489,0.813868,0.714286,0.543981,0.478148,0.455208,0.455208,0.455208,0.383705
2023-06-24,15,7d,2023-06-24,0.00244,0.084465,75,58,73.430108,0.354839,0.304659,0.210753,0.131183,...,0.575124,0.661623,1.0,0.527898,0.75179,0.727174,0.739355,0.743496,0.748671,0.243466
2023-06-27,15,14d,2023-06-27,0.004049,0.07387,57,41,55.295082,0.163934,0.120219,0.111475,0.119672,...,0.617486,0.79235,1.0,0.618333,0.566667,0.601111,0.621111,0.621111,0.621111,0.186546


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-15 00:00:00,0.008414,0.103223,70.555556,38.444444,66.81244,0.194864,0.149966,0.124164,0.103009,...,0.378219,0.465395,0.979319,0.490227,0.504816,0.516964,0.559611,0.570813,0.587688,0.209975
min,2023-06-03 00:00:00,0.001208,0.025509,42.0,13.0,39.663043,0.041667,0.035639,0.030189,0.022013,...,0.069643,0.141489,0.813868,0.102679,0.273243,0.370952,0.418571,0.447143,0.44873,0.152054
25%,2023-06-09 00:00:00,0.001995,0.054077,50.0,34.0,46.072727,0.130435,0.108696,0.093478,0.081522,...,0.285077,0.368139,1.0,0.314484,0.37081,0.377877,0.44873,0.455208,0.475714,0.171341
50%,2023-06-15 00:00:00,0.002346,0.061906,60.0,37.0,55.295082,0.153846,0.133333,0.12,0.101818,...,0.316891,0.446291,1.0,0.52381,0.389881,0.425755,0.514217,0.514217,0.514217,0.189651
75%,2023-06-21 00:00:00,0.004049,0.084465,86.0,47.0,80.833333,0.259259,0.205128,0.148148,0.125926,...,0.575124,0.661623,1.0,0.618333,0.566667,0.601111,0.621111,0.621111,0.644246,0.214813
max,2023-06-27 00:00:00,0.037928,0.273931,126.0,58.0,122.345912,0.460317,0.304659,0.223077,0.215385,...,0.696915,0.79235,1.0,0.966425,0.906869,0.88137,0.860778,0.849765,0.88418,0.383705
std,,0.012852,0.095546,26.907764,13.020283,27.09075,0.139489,0.087446,0.066772,0.056634,...,0.21933,0.233631,0.062044,0.258325,0.208947,0.182042,0.15304,0.145977,0.150474,0.070989
