In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
ORG_NAME = "Index Coop"
SPLITS_FREQ = "W-THU"
LAST_FOLDS = 10
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2023-07-13"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1052 entries, 0 to 1051
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 1052 non-null   object        
 1   author             1052 non-null   object        
 2   date               1052 non-null   datetime64[us]
 3   start              1052 non-null   datetime64[us]
 4   end                1052 non-null   datetime64[us]
 5   platform_proposal  1052 non-null   object        
dtypes: datetime64[us](3), object(3)
memory usage: 49.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24031 entries, 0 to 24030
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        24031 non-null  object        
 1   proposal  24031 non-null  object        
 2   voter     24031 non-null  object        
 3   date      24031 non-null  datetime64[us]
dtype

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0xf3862af14cbb4d9b781e41a3d4d74e7c2cdb73e2,e03585d8-6ab1-52d0-8d05-6b20a2f457b9,2020-11-18 20:56:32,1
1,0xba71dfe1801d125ba201e8bebf7f0b9764d4b19b,e03585d8-6ab1-52d0-8d05-6b20a2f457b9,2020-11-18 21:23:13,1
2,0x042a135bd342910ad7f67bbda74e3fd4125d1272,e03585d8-6ab1-52d0-8d05-6b20a2f457b9,2020-11-18 21:23:55,1
3,0x21f3ef56cd8bafd9448515e4508077c16ecc1f35,e03585d8-6ab1-52d0-8d05-6b20a2f457b9,2020-11-18 21:42:37,1
4,0x506f92308f4d9a2f764d80c68ebc50830a2f7a11,e03585d8-6ab1-52d0-8d05-6b20a2f457b9,2020-11-18 21:45:10,1
...,...,...,...,...
24026,0x9ec14a04cb7822b56bef9d46357604b6f3a673cc,a3350420-69f0-5ca3-98a5-ebe60952bed1,2023-07-18 05:34:29,1
24027,0x53c40473dcdfd927c4201ccfe24e314a7d7c3584,a3350420-69f0-5ca3-98a5-ebe60952bed1,2023-07-18 18:42:04,1
24028,0x197008a1d3e26a97a19f46c121482969cef95b7d,a3350420-69f0-5ca3-98a5-ebe60952bed1,2023-07-18 23:58:06,1
24029,0x24749aedf18208ab74a8110e07e820286bb5acf8,a3350420-69f0-5ca3-98a5-ebe60952bed1,2023-07-19 00:31:54,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2023-05-11 00:00:00'),
 Timestamp('2023-05-18 00:00:00'),
 Timestamp('2023-05-25 00:00:00'),
 Timestamp('2023-06-01 00:00:00'),
 Timestamp('2023-06-08 00:00:00'),
 Timestamp('2023-06-15 00:00:00'),
 Timestamp('2023-06-22 00:00:00'),
 Timestamp('2023-06-29 00:00:00'),
 Timestamp('2023-07-06 00:00:00'),
 Timestamp('2023-07-13 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]


fold_t             2023-07-13 00:00:00
time_train                    5.597387
time_rec                      0.141363
open_proposals                       7
min_recs                             6
avg_recs                           6.5
precision@1                       0.25
precision@3                   0.333333
precision@5                       0.35
precision@10                      0.25
precision@15                  0.166667
precision@100                    0.025
ndcg@1                            0.25
ndcg@3                        0.407732
ndcg@5                        0.504446
ndcg@10                       0.593497
ndcg@15                       0.593497
ndcg@100                      0.593497
map@1                             0.25
map@3                            0.375
map@5                            0.425
map@10                        0.466667
map@15                        0.466667
map@100                       0.466667
recall@1                      0.035714
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/Index Coop/hparams-itemknn_W-THU_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2023-05-11,1,7d,2023-05-11,0.001847,0.011309,10,5,7.000000,0.250000,...,1.0,1.0,1.0,0.375000,0.395238,0.569048,0.626190,0.626190,0.626190,0.129517
1,2023-05-11,1,14d,2023-05-11,0.001276,0.012130,10,5,7.333333,0.222222,...,1.0,1.0,1.0,0.375000,0.369048,0.586310,0.699405,0.699405,0.699405,0.127983
2,2023-05-11,1,21d,2023-05-11,0.001268,0.012512,10,5,7.333333,0.222222,...,1.0,1.0,1.0,0.375000,0.369048,0.586310,0.699405,0.699405,0.699405,0.126863
3,2023-05-11,1,30d,2023-05-11,0.001513,0.013596,10,5,7.333333,0.222222,...,1.0,1.0,1.0,0.375000,0.369048,0.586310,0.699405,0.699405,0.699405,0.127368
4,2023-05-11,1,60d,2023-05-11,0.005409,0.018285,10,5,7.333333,0.222222,...,1.0,1.0,1.0,0.375000,0.369048,0.586310,0.657738,0.657738,0.657738,0.127374
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2023-07-13,15,21d,2023-07-13,0.001594,0.005956,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.126289
766,2023-07-13,15,30d,2023-07-13,0.005763,0.006473,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.124576
767,2023-07-13,15,60d,2023-07-13,0.003779,0.008441,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.123539
768,2023-07-13,15,90d,2023-07-13,0.009007,0.010984,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.122775


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,1,0.001599,4.832275,0.268677,0.151005,0.015841,0.552885,0.570927,0.580239,0.645855,0.657532,0.662087,0.882937,0.958333,1.0,0.872768,0.890625,0.890625
7d,6,0.003303,4.832275,0.261270,0.145450,0.015841,0.547924,0.565228,0.579816,0.642152,0.652337,0.660409,0.864418,0.939815,1.0,0.872768,0.890625,0.890625
7d,7,0.001874,4.832275,0.261270,0.145450,0.015841,0.547924,0.565228,0.579816,0.642152,0.652337,0.660409,0.864418,0.939815,1.0,0.872768,0.890625,0.890625
7d,8,0.004528,4.832275,0.261270,0.145450,0.015841,0.547924,0.565228,0.579816,0.642152,0.652337,0.660409,0.864418,0.939815,1.0,0.872768,0.890625,0.890625
7d,2,0.001400,4.832275,0.268677,0.147302,0.015841,0.547542,0.562384,0.574508,0.636982,0.646241,0.652501,0.882937,0.949074,1.0,0.841518,0.859375,0.859375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60d,6,0.004967,4.865432,0.276173,0.148272,0.016309,0.583843,0.598685,0.615185,0.532030,0.539746,0.548064,0.876323,0.930556,1.0,0.722098,0.757812,0.757812
60d,5,0.004813,4.865432,0.276173,0.148272,0.016309,0.583843,0.598685,0.615185,0.532030,0.539746,0.548064,0.876323,0.930556,1.0,0.722098,0.757812,0.757812
90d,5,0.014460,4.962654,0.276173,0.154753,0.017142,0.579234,0.604623,0.627019,0.526097,0.538367,0.548064,0.847388,0.929398,1.0,0.727307,0.783854,0.783854
90d,10,0.013941,4.962654,0.276173,0.154753,0.017142,0.578804,0.604029,0.626336,0.525703,0.537582,0.547183,0.847388,0.929398,1.0,0.727307,0.783854,0.783854


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Index Coop/models/itemknn-best-avg_W-THU_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-11,7d,1,2023-05-11,0.001847,0.011309,10,5,7.0,0.25,0.333333,0.425,0.325,...,1.0,1.0,1.0,0.375,0.395238,0.569048,0.62619,0.62619,0.62619,0.129517
2023-05-18,7d,1,2023-05-18,0.001066,0.008088,16,8,12.5,0.166667,0.166667,0.166667,0.133333,...,0.625,1.0,1.0,0.5,0.625,0.625,0.625,0.625,0.625,0.127826
2023-05-25,7d,1,2023-05-25,0.001093,0.006569,4,3,3.8,0.2,0.266667,0.28,0.14,...,1.0,1.0,1.0,1.0,0.666667,0.666667,0.666667,0.666667,0.666667,0.127051
2023-06-01,7d,1,2023-06-01,0.0011,0.005253,3,3,3.0,0.75,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.125376
2023-06-08,7d,1,2023-06-08,0.00445,0.005362,2,2,2.0,0.25,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.125515
2023-06-15,7d,1,2023-06-15,0.001523,0.003096,5,5,5.0,0.0,0.166667,0.2,0.1,...,1.0,1.0,1.0,0.0,,,,,,0.11865
2023-06-22,7d,1,2023-06-22,0.001532,0.00416,1,1,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.117408
2023-06-29,7d,1,2023-06-29,0.001056,0.003166,8,5,6.0,0.5,0.166667,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.12433
2023-07-06,7d,1,2023-07-06,0.001484,0.008748,4,3,3.857143,0.857143,0.761905,0.571429,0.285714,...,1.0,1.0,1.0,0.444444,0.761905,0.833333,0.833333,0.833333,0.833333,0.125064
2023-07-13,7d,1,2023-07-13,0.001085,0.004254,7,6,6.333333,0.666667,0.444444,0.4,0.3,...,1.0,1.0,1.0,0.571429,0.714286,0.857143,1.0,1.0,1.0,0.126601


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Index Coop/models/itemknn-best-val_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-11,1,7d,0.001847,7.0,0.425,0.325,0.0325,0.45913,0.549153,0.549153,0.443854,0.54899,0.54899,0.741071,1.0,1.0,0.569048,0.62619,0.62619
2023-05-18,1,7d,0.001066,12.5,0.166667,0.133333,0.02,0.218446,0.291767,0.375576,0.241667,0.291204,0.332199,0.375,0.625,1.0,0.625,0.625,0.625
2023-05-25,6,7d,0.001489,3.8,0.28,0.14,0.014,0.323252,0.323252,0.323252,0.516667,0.516667,0.516667,1.0,1.0,1.0,0.666667,0.666667,0.666667
2023-06-01,6,14d,0.001128,3.0,0.2,0.1,0.01,0.771822,0.771822,0.771822,0.9,0.9,0.9,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-08,3,21d,0.001979,2.0,0.2,0.1,0.01,0.723197,0.723197,0.723197,0.625,0.625,0.625,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-15,9,10YE,0.660049,5.0,0.333333,0.166667,0.016667,0.726604,0.726604,0.726604,0.626852,0.626852,0.626852,1.0,1.0,1.0,0.666667,0.666667,0.666667
2023-06-22,9,90d,0.012838,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-29,2,60d,0.004759,6.0,0.2,0.1,0.01,0.693426,0.693426,0.693426,0.6,0.6,0.6,1.0,1.0,1.0,1.0,1.0,1.0
2023-07-06,6,7d,0.001467,3.857143,0.571429,0.285714,0.028571,0.735557,0.735557,0.735557,0.912698,0.912698,0.912698,1.0,1.0,1.0,0.833333,0.833333,0.833333
2023-07-13,9,7d,0.001126,6.333333,0.4,0.3,0.03,0.5,0.589052,0.589052,0.666667,0.722222,0.722222,0.571429,1.0,1.0,0.857143,1.0,1.0


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-15 00:00:00,0.076211,0.01222,5.555556,4.0,4.832275,0.530423,0.342681,0.283492,0.158413,...,0.958333,1.0,1.0,0.835097,0.826058,0.849868,0.865741,0.865741,0.865741,0.123969
min,2023-05-18 00:00:00,0.001066,0.004224,1.0,1.0,1.0,0.166667,0.166667,0.166667,0.1,...,0.625,1.0,1.0,0.444444,0.625,0.625,0.625,0.625,0.625,0.116689
25%,2023-06-01 00:00:00,0.001128,0.00632,3.0,3.0,3.0,0.25,0.266667,0.2,0.1,...,1.0,1.0,1.0,0.571429,0.666667,0.666667,0.666667,0.666667,0.666667,0.123197
50%,2023-06-15 00:00:00,0.001489,0.006961,4.0,3.0,3.857143,0.5,0.333333,0.2,0.133333,...,1.0,1.0,1.0,1.0,0.761905,0.857143,1.0,1.0,1.0,0.124401
75%,2023-06-29 00:00:00,0.004759,0.008711,7.0,5.0,6.0,0.8,0.333333,0.333333,0.166667,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126045
max,2023-07-13 00:00:00,0.660049,0.051481,16.0,8.0,12.5,1.0,0.761905,0.571429,0.3,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127826
std,,0.218972,0.014872,4.503085,2.179449,3.363791,0.311927,0.179694,0.132528,0.079794,...,0.125,0.0,0.0,0.249394,0.16914,0.161313,0.168966,0.168966,0.168966,0.003226


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Index Coop/models/itemknn-best-test_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-18,1,7d,0.001066,12.5,0.166667,0.133333,0.02,0.218446,0.291767,0.375576,0.241667,0.291204,0.332199,0.375,0.625,1.0,0.625,0.625,0.625
2023-05-25,1,7d,0.001093,3.8,0.28,0.14,0.014,0.323252,0.323252,0.323252,0.516667,0.516667,0.516667,1.0,1.0,1.0,0.666667,0.666667,0.666667
2023-06-01,6,7d,0.005961,3.0,0.2,0.1,0.01,0.605155,0.605155,0.605155,0.875,0.875,0.875,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-08,6,14d,0.001209,2.0,0.2,0.1,0.01,0.723197,0.723197,0.723197,0.625,0.625,0.625,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-15,3,21d,0.003908,5.0,0.4,0.2,0.02,0.371449,0.371449,0.371449,0.398611,0.398611,0.398611,1.0,1.0,1.0,0.333333,0.333333,0.333333
2023-06-22,9,10YE,0.663973,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-29,9,90d,0.010918,6.0,0.2,0.1,0.01,0.408765,0.408765,0.408765,0.225,0.225,0.225,1.0,1.0,1.0,,,
2023-07-06,2,60d,0.004225,3.888889,0.555556,0.277778,0.027778,0.880388,0.880388,0.880388,0.82716,0.82716,0.82716,1.0,1.0,1.0,0.8125,0.8125,0.8125
2023-07-13,6,7d,0.005923,6.333333,0.4,0.3,0.03,0.5,0.589052,0.589052,0.666667,0.722222,0.722222,0.571429,1.0,1.0,0.857143,1.0,1.0


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,8.0,8.0,8.0,8.0,9.0
mean,2023-06-15 00:00:00,0.077586,0.012283,5.555556,4.0,4.835802,0.411111,0.314609,0.289136,0.161235,...,0.958333,1.0,1.0,0.612875,0.676587,0.78683,0.804688,0.804688,0.804688,0.121663
min,2023-05-18 00:00:00,0.001066,0.004241,1.0,1.0,1.0,0.0,0.0,0.166667,0.1,...,0.625,1.0,1.0,0.0,0.0,0.333333,0.333333,0.333333,0.333333,0.108739
25%,2023-06-01 00:00:00,0.001209,0.005608,3.0,3.0,3.0,0.166667,0.25,0.2,0.1,...,1.0,1.0,1.0,0.444444,0.625,0.65625,0.65625,0.65625,0.65625,0.118614
50%,2023-06-15 00:00:00,0.004225,0.006569,4.0,3.0,3.888889,0.25,0.333333,0.2,0.133333,...,1.0,1.0,1.0,0.571429,0.714286,0.834821,0.90625,0.90625,0.90625,0.122927
75%,2023-06-29 00:00:00,0.005961,0.008657,7.0,5.0,6.0,0.666667,0.333333,0.4,0.2,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126474
max,2023-07-13 00:00:00,0.663973,0.049529,16.0,8.0,12.5,1.0,0.703704,0.555556,0.3,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127826
std,,0.219918,0.014409,4.503085,2.179449,3.362657,0.364101,0.192562,0.133428,0.079506,...,0.125,0.0,0.0,0.417361,0.335215,0.235498,0.246739,0.246739,0.246739,0.006182


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))

fold_t             2023-07-13 00:00:00
time_train                    0.596582
time_rec                      0.831525
open_proposals                       7
min_recs                             6
avg_recs                           6.5
precision@1                       0.25
precision@3                   0.333333
precision@5                       0.35
precision@10                      0.25
precision@15                  0.166667
precision@100                    0.025
ndcg@1                            0.25
ndcg@3                        0.407732
ndcg@5                        0.504446
ndcg@10                       0.593497
ndcg@15                       0.593497
ndcg@100                      0.593497
map@1                             0.25
map@3                            0.375
map@5                            0.425
map@10                        0.466667
map@15                        0.466667
map@100                       0.466667
recall@1                      0.035714
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

Restored checkpoint from ../.cache/Index Coop/hparams-userknn_W-THU_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2023-05-11,1,7d,2023-05-11,0.000391,0.007888,10,5,7.000000,0.000000,...,1.0,1.0,1.0,0.000000,0.327381,0.544643,0.657738,0.657738,0.657738,0.121915
1,2023-05-11,1,14d,2023-05-11,0.000393,0.006763,10,5,7.333333,0.000000,...,1.0,1.0,1.0,0.000000,0.373016,0.662698,0.765873,0.765873,0.765873,0.118877
2,2023-05-11,1,21d,2023-05-11,0.000459,0.006931,10,5,7.333333,0.111111,...,1.0,1.0,1.0,0.166667,0.373016,0.662698,0.765873,0.765873,0.765873,0.126114
3,2023-05-11,1,30d,2023-05-11,0.000473,0.007261,10,5,7.333333,0.111111,...,1.0,1.0,1.0,0.166667,0.325397,0.615079,0.765873,0.765873,0.765873,0.126813
4,2023-05-11,1,60d,2023-05-11,0.000901,0.008200,10,5,7.333333,0.111111,...,1.0,1.0,1.0,0.166667,0.325397,0.559524,0.710317,0.710317,0.710317,0.127068
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2023-07-13,15,21d,2023-07-13,0.000326,0.003396,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.124187
766,2023-07-13,15,30d,2023-07-13,0.000372,0.003646,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.124124
767,2023-07-13,15,60d,2023-07-13,0.000520,0.003958,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.124299
768,2023-07-13,15,90d,2023-07-13,0.000701,0.004358,7,6,6.500000,0.250000,...,1.0,1.0,1.0,0.142857,0.428571,0.714286,1.000000,1.000000,1.000000,0.123999


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,3,0.000314,4.832275,0.257566,0.145450,0.015841,0.503266,0.523603,0.538191,0.581041,0.594312,0.602384,0.855159,0.939815,1.0,0.801020,0.821429,0.821429
7d,4,0.000316,4.832275,0.257566,0.145450,0.015841,0.501265,0.521602,0.536190,0.578726,0.591997,0.600070,0.855159,0.939815,1.0,0.801020,0.821429,0.821429
7d,5,0.000319,4.832275,0.257566,0.145450,0.015841,0.501265,0.521602,0.536190,0.578726,0.591997,0.600070,0.855159,0.939815,1.0,0.801020,0.821429,0.821429
7d,6,0.000338,4.832275,0.257566,0.145450,0.015841,0.501265,0.521602,0.536190,0.578726,0.591997,0.600070,0.855159,0.939815,1.0,0.801020,0.821429,0.821429
7d,7,0.000330,4.832275,0.257566,0.145450,0.015841,0.501265,0.521602,0.536190,0.578726,0.591997,0.600070,0.855159,0.939815,1.0,0.801020,0.821429,0.821429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21d,7,0.000376,4.861111,0.269444,0.150463,0.016528,0.531951,0.553684,0.570341,0.495216,0.506570,0.515053,0.853175,0.930556,1.0,0.670493,0.829861,0.829861
21d,6,0.000384,4.861111,0.269444,0.150463,0.016528,0.531951,0.553684,0.570341,0.495216,0.506570,0.515053,0.853175,0.930556,1.0,0.670493,0.829861,0.829861
21d,5,0.000371,4.861111,0.269444,0.150463,0.016528,0.531951,0.553684,0.570341,0.495216,0.506570,0.515053,0.853175,0.930556,1.0,0.670493,0.829861,0.829861
21d,15,0.000377,4.861111,0.269444,0.150463,0.016528,0.531951,0.553684,0.570341,0.495216,0.506570,0.515053,0.853175,0.930556,1.0,0.670493,0.829861,0.829861


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Index Coop/models/userknn-best-avg_W-THU_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-11,7d,3,2023-05-11,0.000381,0.006081,10,5,7.0,0.125,0.291667,0.45,0.325,...,1.0,1.0,1.0,0.25,0.40873,0.615079,0.765873,0.765873,0.765873,0.126749
2023-05-18,7d,3,2023-05-18,0.000364,0.004693,16,8,12.5,0.166667,0.055556,0.066667,0.083333,...,0.458333,1.0,1.0,0.25,0.25,0.25,0.25,0.25,0.25,0.12793
2023-05-25,7d,3,2023-05-25,0.000304,0.003865,4,3,3.8,0.2,0.266667,0.28,0.14,...,1.0,1.0,1.0,1.0,0.666667,0.666667,0.666667,0.666667,0.666667,0.124829
2023-06-01,7d,3,2023-06-01,0.000308,0.003183,3,3,3.0,0.75,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123799
2023-06-08,7d,3,2023-06-08,0.000352,0.003273,2,2,2.0,0.25,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.120989
2023-06-15,7d,3,2023-06-15,0.00031,0.002081,5,5,5.0,0.0,0.166667,0.2,0.1,...,1.0,1.0,1.0,0.0,,,,,,0.11751
2023-06-22,7d,3,2023-06-22,0.000296,0.002568,1,1,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.11628
2023-06-29,7d,3,2023-06-29,0.00029,0.0019,8,5,6.0,0.0,0.0,0.2,0.1,...,1.0,1.0,1.0,0.0,0.0,,,,,0.107982
2023-07-06,7d,3,2023-07-06,0.000284,0.005159,4,3,3.857143,0.857143,0.761905,0.571429,0.285714,...,1.0,1.0,1.0,0.444444,0.761905,0.833333,0.833333,0.833333,0.833333,0.124607
2023-07-13,7d,3,2023-07-13,0.000318,0.002759,7,6,6.333333,0.666667,0.444444,0.4,0.3,...,1.0,1.0,1.0,0.571429,0.714286,0.857143,1.0,1.0,1.0,0.124782


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Index Coop/models/userknn-best-val_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-11,15,7d,0.000351,7.0,0.45,0.325,0.0325,0.462338,0.536337,0.536337,0.443333,0.537061,0.537061,0.782738,1.0,1.0,0.58631,0.699405,0.699405
2023-05-18,1,7d,0.000306,12.5,0.1,0.1,0.02,0.112153,0.196216,0.313894,0.104167,0.154167,0.219242,0.208333,0.5,1.0,0.375,0.375,0.375
2023-05-25,1,14d,0.000409,3.857143,0.314286,0.157143,0.015714,0.484861,0.484861,0.484861,0.56746,0.56746,0.56746,1.0,1.0,1.0,0.666667,0.666667,0.666667
2023-06-01,1,14d,0.000396,3.0,0.2,0.1,0.01,0.771822,0.771822,0.771822,0.9,0.9,0.9,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-08,8,14d,0.000335,2.0,0.2,0.1,0.01,0.723197,0.723197,0.723197,0.625,0.625,0.625,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-15,8,10YE,0.012268,5.0,0.333333,0.166667,0.016667,0.816982,0.816982,0.816982,0.737963,0.737963,0.737963,1.0,1.0,1.0,0.733333,0.733333,0.733333
2023-06-22,2,21d,0.000339,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-29,1,30d,0.000395,6.0,0.2,0.1,0.01,0.408765,0.408765,0.408765,0.225,0.225,0.225,1.0,1.0,1.0,,,
2023-07-06,8,7d,0.000338,3.857143,0.571429,0.285714,0.028571,0.735557,0.735557,0.735557,0.912698,0.912698,0.912698,1.0,1.0,1.0,0.833333,0.833333,0.833333
2023-07-13,10,7d,0.000288,6.333333,0.4,0.3,0.03,0.5,0.589052,0.589052,0.666667,0.722222,0.722222,0.571429,1.0,1.0,0.857143,1.0,1.0


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,8.0,8.0,8.0
mean,0.001675,4.838624,0.279894,0.156614,0.016772,0.617037,0.636272,0.649348,0.637662,0.64939,0.656621,0.864418,0.944444,1.0,0.808185,0.826042,0.826042
min,0.000288,1.0,0.1,0.1,0.01,0.112153,0.196216,0.313894,0.104167,0.154167,0.219242,0.208333,0.5,1.0,0.375,0.375,0.375
25%,0.000335,3.0,0.2,0.1,0.01,0.484861,0.484861,0.484861,0.56746,0.56746,0.56746,1.0,1.0,1.0,0.716667,0.716667,0.716667
50%,0.000339,3.857143,0.2,0.1,0.015714,0.723197,0.723197,0.723197,0.666667,0.722222,0.722222,1.0,1.0,1.0,0.845238,0.916667,0.916667
75%,0.000396,6.0,0.333333,0.166667,0.02,0.771822,0.771822,0.771822,0.9,0.9,0.9,1.0,1.0,1.0,1.0,1.0,1.0
max,0.012268,12.5,0.571429,0.3,0.03,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,0.003973,3.361652,0.142017,0.081665,0.007978,0.26584,0.24272,0.217958,0.305502,0.295989,0.282884,0.283938,0.166667,0.0,0.216032,0.226316,0.226316


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Index Coop/models/userknn-best-test_W-THU_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-18,15,7d,2023-05-18,0.000299,0.004586,16,8,12.5,0.0,0.055556,0.066667,0.083333,...,0.458333,1.0,1.0,0.0,0.25,0.25,0.25,0.25,0.25,0.119759
2023-05-25,1,7d,2023-05-25,0.000353,0.00396,4,3,3.8,0.2,0.333333,0.28,0.14,...,1.0,1.0,1.0,1.0,0.666667,0.666667,0.666667,0.666667,0.666667,0.12549
2023-06-01,1,14d,2023-06-01,0.000396,0.004065,3,3,3.0,0.8,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124058
2023-06-08,1,14d,2023-06-08,0.000345,0.003378,2,2,2.0,0.25,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.121692
2023-06-15,8,14d,2023-06-15,0.000384,0.002187,5,5,5.0,0.0,0.166667,0.2,0.1,...,1.0,1.0,1.0,0.0,,,,,,0.117518
2023-06-22,8,10YE,2023-06-22,0.012245,0.038744,1,1,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.116443
2023-06-29,2,21d,2023-06-29,0.000325,0.002135,8,5,6.0,0.0,0.0,0.2,0.1,...,1.0,1.0,1.0,0.0,0.0,,,,,0.107734
2023-07-06,1,30d,2023-07-06,0.000351,0.006262,4,3,3.875,0.75,0.75,0.575,0.2875,...,1.0,1.0,1.0,0.444444,0.75,0.8125,0.8125,0.8125,0.8125,0.124756
2023-07-13,8,7d,2023-07-13,0.00029,0.002573,7,6,6.333333,0.666667,0.444444,0.4,0.3,...,1.0,1.0,1.0,0.571429,0.714286,0.857143,1.0,1.0,1.0,0.124662


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,8.0,7.0,7.0,7.0,7.0,9.0
mean,2023-06-15 00:00:00,0.001665,0.007543,5.555556,4.0,4.834259,0.407407,0.305556,0.257963,0.145648,...,0.939815,1.0,1.0,0.557319,0.672619,0.798044,0.818452,0.818452,0.818452,0.120234
min,2023-05-18 00:00:00,0.00029,0.002135,1.0,1.0,1.0,0.0,0.0,0.066667,0.083333,...,0.458333,1.0,1.0,0.0,0.0,0.25,0.25,0.25,0.25,0.107734
25%,2023-06-01 00:00:00,0.000325,0.002573,3.0,3.0,3.0,0.0,0.166667,0.2,0.1,...,1.0,1.0,1.0,0.0,0.5625,0.739583,0.739583,0.739583,0.739583,0.117518
50%,2023-06-15 00:00:00,0.000351,0.00396,4.0,3.0,3.875,0.25,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.571429,0.732143,0.857143,1.0,1.0,1.0,0.121692
75%,2023-06-29 00:00:00,0.000384,0.004586,7.0,5.0,6.0,0.75,0.333333,0.28,0.14,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124662
max,2023-07-13 00:00:00,0.012245,0.038744,16.0,8.0,12.5,1.0,0.75,0.575,0.3,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.12549
std,,0.003967,0.011773,4.503085,2.179449,3.363149,0.396172,0.222222,0.147718,0.085352,...,0.180556,0.0,0.0,0.464841,0.370389,0.271929,0.282268,0.282268,0.282268,0.00573
