In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
ORG_NAME = "SharkDAO"
SPLITS_FREQ = "3d"
LAST_FOLDS = 10
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = "2022-04-27"


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 471 entries, 0 to 470
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 471 non-null    object        
 1   author             471 non-null    object        
 2   date               471 non-null    datetime64[us]
 3   start              471 non-null    datetime64[us]
 4   end                471 non-null    datetime64[us]
 5   platform_proposal  471 non-null    object        
dtypes: datetime64[us](3), object(3)
memory usage: 22.2+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7577 entries, 0 to 7576
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        7577 non-null   object        
 1   proposal  7577 non-null   object        
 2   voter     7577 non-null   object        
 3   date      7577 non-null   datetime64[us]
dtypes: d

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0xde30040413b26d7aa2b6fc4761d80eb35dcf97ad,71fd3375-6164-5810-a77c-2279e0a7a93d,2021-08-09 23:44:27,1
1,0x823b92d6a4b2aed4b15675c7917c9f922ea8adad,71fd3375-6164-5810-a77c-2279e0a7a93d,2021-08-09 23:44:52,1
2,0x397fbd1e27352175cc5b6baf171a30911268823c,71fd3375-6164-5810-a77c-2279e0a7a93d,2021-08-09 23:48:54,1
3,0x40b6df5f6217fb1a1beaf8fdbb9572304aa78c4d,71fd3375-6164-5810-a77c-2279e0a7a93d,2021-08-09 23:49:58,1
4,0xf33364a83b6002d5070ec5fa31f5d9eb93572732,86ab69e0-1e59-5d8a-90d0-33c00a5934b2,2021-08-10 00:09:04,1
...,...,...,...,...
7572,0xe3f27deff96fe178e87559f36cbf868b9e75967d,51c2826f-fced-5e49-8746-5bbed3d189f5,2023-07-19 15:21:54,1
7573,0xbd7dbab9aeb52d6c8d0e80fcebde3af4cc86204a,be6c0ebe-be38-52d6-b766-c55b7489f245,2023-07-20 18:25:20,1
7574,0xbd7dbab9aeb52d6c8d0e80fcebde3af4cc86204a,ac915362-a23d-5498-9f08-38bbbf0a8bbc,2023-07-20 18:26:23,1
7575,0xbd7dbab9aeb52d6c8d0e80fcebde3af4cc86204a,dfa74f32-39e0-5609-8e00-a821f32f8ef2,2023-07-20 18:27:16,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2022-03-31 00:00:00'),
 Timestamp('2022-04-03 00:00:00'),
 Timestamp('2022-04-06 00:00:00'),
 Timestamp('2022-04-09 00:00:00'),
 Timestamp('2022-04-12 00:00:00'),
 Timestamp('2022-04-15 00:00:00'),
 Timestamp('2022-04-18 00:00:00'),
 Timestamp('2022-04-21 00:00:00'),
 Timestamp('2022-04-24 00:00:00'),
 Timestamp('2022-04-27 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]


fold_t             2022-04-27 00:00:00
time_train                    5.564041
time_rec                      0.147546
open_proposals                       5
min_recs                             4
avg_recs                      4.428571
precision@1                   0.428571
precision@3                    0.47619
precision@5                   0.428571
precision@10                  0.214286
precision@15                  0.142857
precision@100                 0.021429
ndcg@1                            0.25
ndcg@3                        0.368918
ndcg@5                        0.436778
ndcg@10                       0.436778
ndcg@15                       0.436778
ndcg@100                      0.436778
map@1                         0.428571
map@3                         0.571429
map@5                         0.659524
map@10                        0.659524
map@15                        0.659524
map@100                       0.659524
recall@1                      0.214286
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/SharkDAO/hparams-itemknn_3d_normalize.pkl with 1540 results


  0%|          | 0/770 [00:00<?, ?it/s]























Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2022-04-03,1,7d,2022-04-03,0.004139,0.024563,3,1.0,2.375000,0.312500,...,1.0,1.0,1.0,0.866667,1.000000,1.000000,1.000000,1.000000,1.000000,0.126682
1,2022-04-03,1,14d,2022-04-03,0.001033,0.020841,3,1.0,2.500000,0.250000,...,1.0,1.0,1.0,0.866667,1.000000,1.000000,1.000000,1.000000,1.000000,0.125199
2,2022-04-03,1,21d,2022-04-03,0.001019,0.021199,3,1.0,2.523810,0.285714,...,1.0,1.0,1.0,0.777778,1.000000,1.000000,1.000000,1.000000,1.000000,0.124885
3,2022-04-03,1,30d,2022-04-03,0.000984,0.024109,3,1.0,2.583333,0.291667,...,1.0,1.0,1.0,0.714286,1.000000,1.000000,1.000000,1.000000,1.000000,0.125213
4,2022-04-03,1,60d,2022-04-03,0.001178,0.026592,3,1.0,2.615385,0.307692,...,1.0,1.0,1.0,0.750000,0.944444,0.944444,0.944444,0.944444,0.944444,0.125296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2022-03-31,15,21d,2022-03-31,0.001307,0.006926,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.124046
766,2022-03-31,15,30d,2022-03-31,0.001093,0.008493,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.123198
767,2022-03-31,15,60d,2022-03-31,0.001246,0.011241,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.123539
768,2022-03-31,15,90d,2022-03-31,0.001121,0.013848,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.119887


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,8,0.003240,3.297884,0.336984,0.171314,0.017131,0.421318,0.423273,0.423273,0.697206,0.699852,0.699852,0.992651,1.0,1.0,0.863492,0.866138,0.866138
7d,7,0.002972,3.297884,0.336984,0.171314,0.017131,0.421318,0.423273,0.423273,0.697206,0.699852,0.699852,0.992651,1.0,1.0,0.863492,0.866138,0.866138
7d,6,0.002128,3.297884,0.336984,0.171314,0.017131,0.420749,0.422705,0.422705,0.696177,0.698823,0.698823,0.992651,1.0,1.0,0.863492,0.866138,0.866138
7d,15,0.009558,3.297884,0.336984,0.171314,0.017131,0.420106,0.422061,0.422061,0.694120,0.696765,0.696765,0.992651,1.0,1.0,0.863492,0.866138,0.866138
7d,10,0.003299,3.297884,0.336984,0.171314,0.017131,0.420106,0.422061,0.422061,0.694120,0.696765,0.696765,0.992651,1.0,1.0,0.863492,0.866138,0.866138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60d,10,0.005646,3.503098,0.344131,0.177158,0.017716,0.601850,0.606444,0.606444,0.655417,0.659275,0.659275,0.981481,1.0,1.0,0.804475,0.811420,0.811420
90d,5,0.001973,3.529936,0.341121,0.175368,0.017537,0.625428,0.630021,0.630021,0.653859,0.657599,0.657599,0.982550,1.0,1.0,0.813580,0.819753,0.819753
90d,6,0.003331,3.529936,0.341121,0.175368,0.017537,0.625428,0.630021,0.630021,0.653859,0.657599,0.657599,0.982550,1.0,1.0,0.813580,0.819753,0.819753
90d,3,0.004021,3.529936,0.341121,0.175368,0.017537,0.624488,0.629082,0.629082,0.651776,0.655515,0.655515,0.982550,1.0,1.0,0.813580,0.819753,0.819753


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/SharkDAO/models/itemknn-best-avg_3d_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-04-03,7d,8,2022-04-03,0.001734,0.016637,3,1.0,2.375,0.25,0.375,0.225,0.1125,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.124809
2022-04-06,7d,8,2022-04-06,0.001253,0.006707,2,1.0,1.8,0.4,0.4,0.24,0.12,...,1.0,1.0,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.124857
2022-04-09,7d,8,2022-04-09,0.000937,0.009776,2,1.0,1.625,0.875,0.458333,0.275,0.1375,...,1.0,1.0,1.0,0.785714,1.0,1.0,1.0,1.0,1.0,0.125671
2022-04-12,7d,8,2022-04-12,0.001225,0.011754,5,2.0,4.0,0.5,0.566667,0.4,0.2,...,1.0,1.0,1.0,0.433333,0.761905,0.761905,0.761905,0.761905,0.761905,0.124518
2022-04-15,7d,8,2022-04-15,0.001122,0.011121,4,1.0,3.0,0.333333,0.296296,0.266667,0.133333,...,1.0,1.0,1.0,0.611111,1.0,1.0,1.0,1.0,1.0,0.124128
2022-04-18,7d,8,2022-04-18,0.005674,0.005656,3,3.0,3.0,0.0,0.583333,0.35,0.175,...,1.0,1.0,1.0,0.0,0.5,0.5,0.5,0.5,0.5,0.117611
2022-04-21,7d,8,2022-04-21,0.001014,0.011123,6,1.0,4.333333,0.777778,0.481481,0.466667,0.244444,...,1.0,1.0,1.0,0.652381,0.814286,0.97619,1.0,1.0,1.0,0.124864
2022-04-24,7d,8,2022-04-24,0.015134,0.01603,6,4.0,5.214286,0.571429,0.428571,0.342857,0.185714,...,1.0,1.0,1.0,0.604167,0.7,0.7,0.7,0.7,0.7,0.124979
2022-04-27,7d,8,2022-04-27,0.001063,0.008552,5,4.0,4.333333,0.5,0.555556,0.466667,0.233333,...,1.0,1.0,1.0,0.5,0.708333,0.833333,0.833333,0.833333,0.833333,0.12504
2022-03-31,7d,8,2022-03-31,0.005616,0.001144,1,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07055


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/SharkDAO/models/itemknn-best-val_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-03-31,15,10YE,0.004194,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-03,1,7d,0.004139,2.375,0.225,0.1125,0.01125,0.328078,0.328078,0.328078,0.59375,0.59375,0.59375,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-06,8,90d,0.001265,1.875,0.25,0.125,0.0125,0.689279,0.689279,0.689279,0.8125,0.8125,0.8125,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-09,4,7d,0.001163,1.625,0.275,0.1375,0.01375,0.476933,0.476933,0.476933,0.9375,0.9375,0.9375,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-12,4,7d,0.001269,4.0,0.4,0.2,0.02,0.347053,0.347053,0.347053,0.715,0.715,0.715,1.0,1.0,1.0,0.761905,0.761905,0.761905
2022-04-15,9,60d,0.001267,3.25,0.266667,0.133333,0.013333,0.685904,0.685904,0.685904,0.576389,0.576389,0.576389,1.0,1.0,1.0,0.9,0.9,0.9
2022-04-18,2,14d,0.001819,3.0,0.325,0.1625,0.01625,0.722629,0.722629,0.722629,0.854167,0.854167,0.854167,1.0,1.0,1.0,0.75,0.75,0.75
2022-04-21,1,7d,0.00109,4.333333,0.466667,0.244444,0.024444,0.521169,0.521169,0.521169,0.827778,0.827778,0.827778,0.981481,1.0,1.0,0.97619,1.0,1.0
2022-04-24,5,7d,0.001123,5.214286,0.342857,0.185714,0.018571,0.572407,0.590003,0.590003,0.68254,0.706349,0.706349,0.952381,1.0,1.0,0.7,0.7,0.7
2022-04-27,7,7d,0.004137,4.333333,0.466667,0.233333,0.023333,0.411799,0.411799,0.411799,0.755556,0.755556,0.755556,1.0,1.0,1.0,0.833333,0.833333,0.833333


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2022-04-15 00:00:00,0.001919,0.013298,4.0,2.0,3.333995,0.592041,0.458833,0.335317,0.170481,...,1.0,1.0,1.0,0.673214,0.84828,0.880159,0.882804,0.882804,0.882804,0.124633
min,2022-04-03 00:00:00,0.00109,0.008573,2.0,1.0,1.625,0.3125,0.305556,0.225,0.1125,...,1.0,1.0,1.0,0.433333,0.7,0.7,0.7,0.7,0.7,0.122857
25%,2022-04-09 00:00:00,0.001163,0.010274,3.0,1.0,2.375,0.5,0.416667,0.266667,0.133333,...,1.0,1.0,1.0,0.604167,0.75,0.761905,0.761905,0.761905,0.761905,0.123902
50%,2022-04-15 00:00:00,0.001267,0.011831,4.0,1.0,3.25,0.571429,0.458333,0.325,0.1625,...,1.0,1.0,1.0,0.666667,0.814286,0.9,0.9,0.9,0.9,0.124429
75%,2022-04-21 00:00:00,0.001819,0.015026,5.0,3.0,4.333333,0.75,0.541667,0.4,0.2,...,1.0,1.0,1.0,0.785714,1.0,1.0,1.0,1.0,1.0,0.125411
max,2022-04-27 00:00:00,0.004139,0.024563,6.0,4.0,5.214286,0.875,0.566667,0.466667,0.244444,...,1.0,1.0,1.0,0.866667,1.0,1.0,1.0,1.0,1.0,0.126682
std,,0.001276,0.004868,1.581139,1.322876,1.228008,0.182643,0.087731,0.091277,0.04807,...,0.0,0.0,0.0,0.143545,0.128247,0.121592,0.124174,0.124174,0.124174,0.00116


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/SharkDAO/models/itemknn-best-test_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-04-03,15,10YE,0.004111,2.666667,0.253333,0.126667,0.012667,0.556891,0.556891,0.556891,0.505556,0.505556,0.505556,1.0,1.0,1.0,0.857143,0.857143,0.857143
2022-04-06,1,7d,0.00569,1.8,0.24,0.12,0.012,0.389279,0.389279,0.389279,0.7,0.7,0.7,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-09,8,90d,0.003532,1.769231,0.292308,0.146154,0.014615,0.743299,0.743299,0.743299,0.884615,0.884615,0.884615,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-12,4,7d,0.001269,4.0,0.4,0.2,0.02,0.347053,0.347053,0.347053,0.715,0.715,0.715,1.0,1.0,1.0,0.761905,0.761905,0.761905
2022-04-15,4,7d,0.003554,3.0,0.266667,0.133333,0.013333,0.487803,0.487803,0.487803,0.537037,0.537037,0.537037,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-18,9,60d,0.005803,3.0,0.325,0.1625,0.01625,0.572629,0.572629,0.572629,0.604167,0.604167,0.604167,1.0,1.0,1.0,0.6,0.6,0.6
2022-04-21,2,14d,0.019543,4.636364,0.563636,0.309091,0.030909,0.626976,0.626976,0.626976,0.809091,0.809091,0.809091,0.954545,1.0,1.0,0.9375,1.0,1.0
2022-04-24,1,7d,0.001228,5.214286,0.342857,0.185714,0.018571,0.544919,0.562515,0.562515,0.640873,0.664683,0.664683,0.952381,1.0,1.0,0.722222,0.722222,0.722222
2022-04-27,5,7d,0.005777,4.333333,0.466667,0.233333,0.023333,0.395768,0.395768,0.395768,0.718519,0.718519,0.718519,1.0,1.0,1.0,0.833333,0.833333,0.833333


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2022-04-15 00:00:00,0.005612,0.015536,4.0,2.0,3.379987,0.466675,0.472234,0.350052,0.179644,...,1.0,1.0,1.0,0.583179,0.818011,0.8569,0.863845,0.863845,0.863845,0.125086
min,2022-04-03 00:00:00,0.001228,0.006715,2.0,1.0,1.769231,0.166667,0.296296,0.24,0.12,...,1.0,1.0,1.0,0.433333,0.6,0.6,0.6,0.6,0.6,0.123787
25%,2022-04-09 00:00:00,0.003532,0.01102,3.0,1.0,2.666667,0.375,0.404762,0.266667,0.133333,...,1.0,1.0,1.0,0.5,0.7125,0.761905,0.761905,0.761905,0.761905,0.124268
50%,2022-04-15 00:00:00,0.004111,0.011831,4.0,1.0,3.0,0.428571,0.487179,0.325,0.1625,...,1.0,1.0,1.0,0.6,0.761905,0.857143,0.857143,0.857143,0.857143,0.124736
75%,2022-04-21 00:00:00,0.005777,0.016167,5.0,3.0,4.333333,0.5,0.555556,0.4,0.2,...,1.0,1.0,1.0,0.666667,1.0,1.0,1.0,1.0,1.0,0.12579
max,2022-04-27 00:00:00,0.019543,0.042967,6.0,4.0,5.214286,0.769231,0.575758,0.563636,0.309091,...,1.0,1.0,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.127043
std,,0.005512,0.010788,1.581139,1.322876,1.231103,0.188334,0.096726,0.108481,0.061252,...,0.0,0.0,0.0,0.108947,0.151618,0.142146,0.147983,0.147983,0.147983,0.001238


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))

fold_t             2022-04-27 00:00:00
time_train                    0.596554
time_rec                      0.838941
open_proposals                       5
min_recs                             4
avg_recs                      4.428571
precision@1                   0.428571
precision@3                   0.428571
precision@5                   0.428571
precision@10                  0.214286
precision@15                  0.142857
precision@100                 0.021429
ndcg@1                            0.25
ndcg@3                        0.294227
ndcg@5                        0.418738
ndcg@10                       0.418738
ndcg@15                       0.418738
ndcg@100                      0.418738
map@1                         0.428571
map@3                         0.484127
map@5                         0.626984
map@10                        0.626984
map@15                        0.626984
map@100                       0.626984
recall@1                      0.214286
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

Restored checkpoint from ../.cache/SharkDAO/hparams-userknn_3d_normalize.pkl with 1540 results


  0%|          | 0/770 [00:00<?, ?it/s]























Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2022-04-03,1,7d,2022-04-03,0.000434,0.012839,3,1.0,2.375000,0.187500,...,1.0,1.0,1.0,0.777778,1.000000,1.000000,1.000000,1.000000,1.000000,0.129494
1,2022-04-03,1,14d,2022-04-03,0.000397,0.015548,3,1.0,2.500000,0.200000,...,1.0,1.0,1.0,0.833333,1.000000,1.000000,1.000000,1.000000,1.000000,0.128100
2,2022-04-03,1,21d,2022-04-03,0.000398,0.015339,3,1.0,2.523810,0.190476,...,1.0,1.0,1.0,0.666667,1.000000,1.000000,1.000000,1.000000,1.000000,0.127951
3,2022-04-03,1,30d,2022-04-03,0.000421,0.017465,3,1.0,2.583333,0.208333,...,1.0,1.0,1.0,0.600000,1.000000,1.000000,1.000000,1.000000,1.000000,0.128374
4,2022-04-03,1,60d,2022-04-03,0.000462,0.019070,3,1.0,2.615385,0.230769,...,1.0,1.0,1.0,0.666667,0.928571,0.928571,0.928571,0.928571,0.928571,0.128188
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2022-03-31,15,21d,2022-03-31,0.000394,0.004552,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.124298
766,2022-03-31,15,30d,2022-03-31,0.000431,0.005406,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.123519
767,2022-03-31,15,60d,2022-03-31,0.000457,0.006983,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.124594
768,2022-03-31,15,90d,2022-03-31,0.000470,0.008666,1,1.0,1.000000,1.000000,...,1.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.120235


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
14d,1,0.000416,3.405078,0.342425,0.174243,0.017424,0.535622,0.535622,0.535622,0.695196,0.695196,0.695196,0.994949,1.0,1.0,0.892262,0.899206,0.899206
14d,7,0.000414,3.405078,0.339462,0.174243,0.017424,0.528526,0.530481,0.530481,0.682912,0.685381,0.685381,0.990011,1.0,1.0,0.893368,0.900313,0.900313
10YE,2,0.002228,3.566539,0.336549,0.172458,0.017246,0.710379,0.713311,0.713311,0.681918,0.685186,0.685186,0.989760,1.0,1.0,0.863301,0.868351,0.868351
14d,4,0.000412,3.405078,0.339462,0.174243,0.017424,0.526622,0.528577,0.528577,0.680378,0.682847,0.682847,0.990011,1.0,1.0,0.885432,0.892376,0.892376
14d,3,0.000429,3.405078,0.337442,0.174243,0.017424,0.523757,0.528350,0.528350,0.678358,0.682510,0.682510,0.979910,1.0,1.0,0.885432,0.892376,0.892376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90d,10,0.000571,3.529936,0.339732,0.175368,0.017537,0.614898,0.620470,0.620470,0.638873,0.643769,0.643769,0.980235,1.0,1.0,0.824846,0.831790,0.831790
60d,10,0.000502,3.503098,0.342742,0.177158,0.017716,0.588483,0.594054,0.594054,0.637570,0.642585,0.642585,0.979167,1.0,1.0,0.822531,0.830467,0.830467
60d,9,0.000503,3.503098,0.342742,0.177158,0.017716,0.588483,0.594054,0.594054,0.637570,0.642585,0.642585,0.979167,1.0,1.0,0.822531,0.830467,0.830467
60d,6,0.000534,3.503098,0.342742,0.177158,0.017716,0.586886,0.592457,0.592457,0.637493,0.642508,0.642508,0.979167,1.0,1.0,0.844753,0.852690,0.852690


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/SharkDAO/models/userknn-best-avg_3d_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-04-03,14d,1,2022-04-03,0.000397,0.015548,3,1.0,2.5,0.2,0.366667,0.22,0.11,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.1281
2022-04-06,14d,1,2022-04-06,0.000398,0.005187,2,1.0,1.833333,0.5,0.388889,0.233333,0.116667,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.125565
2022-04-09,14d,1,2022-04-09,0.000332,0.008925,2,1.0,1.75,0.833333,0.5,0.3,0.15,...,1.0,1.0,1.0,0.7,1.0,1.0,1.0,1.0,1.0,0.124653
2022-04-12,14d,1,2022-04-12,0.000395,0.010004,5,2.0,4.230769,0.153846,0.384615,0.384615,0.192308,...,1.0,1.0,1.0,0.5,0.619048,0.619048,0.619048,0.619048,0.619048,0.127377
2022-04-15,14d,1,2022-04-15,0.000403,0.007213,4,1.0,3.0,0.444444,0.296296,0.266667,0.133333,...,1.0,1.0,1.0,0.708333,1.0,1.0,1.0,1.0,1.0,0.124905
2022-04-18,14d,1,2022-04-18,0.000439,0.00654,3,3.0,3.0,0.625,0.541667,0.325,0.1625,...,1.0,1.0,1.0,0.7,0.857143,0.857143,0.857143,0.857143,0.857143,0.125068
2022-04-21,14d,1,2022-04-21,0.000441,0.008514,6,1.0,4.636364,0.727273,0.545455,0.563636,0.309091,...,1.0,1.0,1.0,0.4875,0.7125,0.9375,1.0,1.0,1.0,0.126382
2022-04-24,14d,1,2022-04-24,0.000478,0.011296,6,4.0,5.266667,0.6,0.377778,0.36,0.18,...,1.0,1.0,1.0,0.574074,0.7,0.7,0.7,0.7,0.7,0.125711
2022-04-27,14d,1,2022-04-27,0.000465,0.006058,5,4.0,4.428571,0.428571,0.52381,0.428571,0.214286,...,1.0,1.0,1.0,0.5,0.791667,0.916667,0.916667,0.916667,0.916667,0.127505
2022-03-31,14d,1,2022-03-31,0.000318,0.003918,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.125643


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/SharkDAO/models/userknn-best-val_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-03-31,15,10YE,0.002119,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-03,2,60d,0.000438,2.615385,0.253846,0.126923,0.012692,0.503233,0.503233,0.503233,0.541667,0.541667,0.541667,1.0,1.0,1.0,0.928571,0.928571,0.928571
2022-04-06,15,60d,0.00041,1.875,0.25,0.125,0.0125,0.689279,0.689279,0.689279,0.8125,0.8125,0.8125,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-09,15,7d,0.000372,1.625,0.275,0.1375,0.01375,0.476933,0.476933,0.476933,0.9375,0.9375,0.9375,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-12,2,7d,0.000365,4.0,0.4,0.2,0.02,0.302137,0.302137,0.302137,0.580833,0.580833,0.580833,1.0,1.0,1.0,0.611111,0.611111,0.611111
2022-04-15,2,14d,0.000435,3.0,0.266667,0.133333,0.013333,0.518559,0.518559,0.518559,0.592593,0.592593,0.592593,1.0,1.0,1.0,1.0,1.0,1.0
2022-04-18,1,10YE,0.002275,3.0,0.333333,0.166667,0.016667,0.824408,0.824408,0.824408,0.87963,0.87963,0.87963,1.0,1.0,1.0,0.875,0.875,0.875
2022-04-21,8,7d,0.000397,4.333333,0.466667,0.244444,0.024444,0.53744,0.53744,0.53744,0.861111,0.861111,0.861111,0.981481,1.0,1.0,0.97619,1.0,1.0
2022-04-24,2,7d,0.000424,5.214286,0.342857,0.185714,0.018571,0.596851,0.614447,0.614447,0.724206,0.748016,0.748016,0.952381,1.0,1.0,0.727273,0.727273,0.727273
2022-04-27,1,7d,0.000412,4.333333,0.466667,0.233333,0.023333,0.392689,0.392689,0.392689,0.709259,0.709259,0.709259,1.0,1.0,1.0,0.916667,0.916667,0.916667


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,0.000614,3.332926,0.339449,0.172546,0.017255,0.537948,0.539903,0.539903,0.7377,0.740345,0.740345,0.992651,1.0,1.0,0.892757,0.895402,0.895402
min,0.000365,1.625,0.25,0.125,0.0125,0.302137,0.302137,0.302137,0.541667,0.541667,0.541667,0.952381,1.0,1.0,0.611111,0.611111,0.611111
25%,0.000397,2.615385,0.266667,0.133333,0.013333,0.476933,0.476933,0.476933,0.592593,0.592593,0.592593,1.0,1.0,1.0,0.875,0.875,0.875
50%,0.000412,3.0,0.333333,0.166667,0.016667,0.518559,0.518559,0.518559,0.724206,0.748016,0.748016,1.0,1.0,1.0,0.928571,0.928571,0.928571
75%,0.000435,4.333333,0.4,0.2,0.02,0.596851,0.614447,0.614447,0.861111,0.861111,0.861111,1.0,1.0,1.0,1.0,1.0,1.0
max,0.002275,5.214286,0.466667,0.244444,0.024444,0.824408,0.824408,0.824408,0.9375,0.9375,0.9375,1.0,1.0,1.0,1.0,1.0,1.0
std,0.000623,1.212705,0.087224,0.046008,0.004601,0.154289,0.155237,0.155237,0.144037,0.143977,0.143977,0.016296,0.0,0.0,0.137087,0.139113,0.139113


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/SharkDAO/models/userknn-best-test_3d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-04-03,15,10YE,2022-04-03,0.002192,0.027884,3,1.0,2.666667,0.166667,0.422222,0.253333,0.126667,...,1.0,1.0,1.0,0.6,0.857143,0.857143,0.857143,0.857143,0.857143,0.128241
2022-04-06,2,60d,2022-04-06,0.000464,0.007116,2,1.0,1.875,0.625,0.416667,0.25,0.125,...,1.0,1.0,1.0,0.8,1.0,1.0,1.0,1.0,1.0,0.125466
2022-04-09,15,60d,2022-04-09,0.000397,0.009556,2,1.0,1.75,0.833333,0.5,0.3,0.15,...,1.0,1.0,1.0,0.7,1.0,1.0,1.0,1.0,1.0,0.125306
2022-04-12,15,7d,2022-04-12,0.000334,0.007664,5,2.0,4.0,0.2,0.433333,0.4,0.2,...,1.0,1.0,1.0,0.5,0.611111,0.611111,0.611111,0.611111,0.611111,0.125837
2022-04-15,2,7d,2022-04-15,0.000383,0.006898,4,1.0,3.0,0.444444,0.296296,0.266667,0.133333,...,1.0,1.0,1.0,0.708333,1.0,1.0,1.0,1.0,1.0,0.125584
2022-04-18,2,14d,2022-04-18,0.000363,0.006394,3,3.0,3.0,0.5,0.541667,0.325,0.1625,...,1.0,1.0,1.0,0.75,0.785714,0.785714,0.785714,0.785714,0.785714,0.123872
2022-04-21,1,10YE,2022-04-21,0.002395,0.018226,6,1.0,5.0,0.666667,0.577778,0.493333,0.266667,...,1.0,1.0,1.0,0.49,0.67,0.85,0.9,0.9,0.9,0.125424
2022-04-24,8,7d,2022-04-24,0.00043,0.097054,6,4.0,5.214286,0.642857,0.428571,0.342857,0.185714,...,1.0,1.0,1.0,0.648148,0.727273,0.727273,0.727273,0.727273,0.727273,0.12747
2022-04-27,2,7d,2022-04-27,0.000486,0.005276,5,4.0,4.333333,0.5,0.5,0.466667,0.233333,...,1.0,1.0,1.0,0.5,0.791667,0.916667,0.916667,0.916667,0.916667,0.125044


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2022-04-15 00:00:00,0.000827,0.020674,4.0,2.0,3.426587,0.508774,0.457393,0.344206,0.175913,...,1.0,1.0,1.0,0.632942,0.82699,0.860879,0.866434,0.866434,0.866434,0.125805
min,2022-04-03 00:00:00,0.000334,0.005276,2.0,1.0,1.75,0.166667,0.296296,0.25,0.125,...,1.0,1.0,1.0,0.49,0.611111,0.611111,0.611111,0.611111,0.611111,0.123872
25%,2022-04-09 00:00:00,0.000383,0.006898,3.0,1.0,2.666667,0.444444,0.422222,0.266667,0.133333,...,1.0,1.0,1.0,0.5,0.727273,0.785714,0.785714,0.785714,0.785714,0.125306
50%,2022-04-15 00:00:00,0.00043,0.007664,4.0,1.0,3.0,0.5,0.433333,0.325,0.1625,...,1.0,1.0,1.0,0.648148,0.791667,0.857143,0.9,0.9,0.9,0.125466
75%,2022-04-21 00:00:00,0.000486,0.018226,5.0,3.0,4.333333,0.642857,0.5,0.4,0.2,...,1.0,1.0,1.0,0.708333,1.0,1.0,1.0,1.0,1.0,0.125837
max,2022-04-27 00:00:00,0.002395,0.097054,6.0,4.0,5.214286,0.833333,0.577778,0.493333,0.266667,...,1.0,1.0,1.0,0.8,1.0,1.0,1.0,1.0,1.0,0.128241
std,,0.000834,0.029579,1.581139,1.322876,1.273886,0.217493,0.083279,0.090704,0.049864,...,0.0,0.0,0.0,0.116666,0.147839,0.135643,0.136165,0.136165,0.136165,0.001303
