In [1]:
import datetime as dt

import pandas as pd
import numpy as np
from lenskit.algorithms import item_knn, user_knn
from sklearn.model_selection import ParameterGrid

from recsys4daos.datasets import to_lenskit
from recsys4daos.model_selection import cvtt_open, explore_hparams
from recsys4daos.evaluation import test_with_hparams_lenskit

import paths

  from tqdm.autonotebook import tqdm


# Parameters

In [2]:
# Dataset config
ORG_NAME = 'Decentraland'
SPLITS_FREQ = 'W-THU'  # Split weekly
LAST_FOLDS = 20  # Use just last 10 splits
SPLITS_NORMALIZE = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10,15,100]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_FOLDS = 10 # Use just last 10 splits
LAST_FOLD_DATE_STR: str = None

# Search space config
WINDOW_SIZES = ['7d', '14d', '21d', '30d', '60d', '90d', '10YE']
ITEMKNN_Ks = [1,2,3,4,5,6,7,8,9,10,15]

OPTIM_METRIC = 'map@10'

In [3]:
# Parameters
EXECUTION_ID = "2024-09-04T10:00"
ORG_NAME = "Aura Finance"
SPLITS_FREQ = "7d"
LAST_FOLDS = 10
SPLITS_NORMALIZE = True
LAST_FOLD_DATE_STR = None


# Load the dataset

In [4]:
dfp = paths.load_proposals(ORG_NAME)
dfv = paths.load_votes(ORG_NAME)

print(dfp.info())
print(dfv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 415 entries, 0 to 414
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 415 non-null    object        
 1   author             415 non-null    object        
 2   date               415 non-null    datetime64[us]
 3   start              415 non-null    datetime64[us]
 4   end                415 non-null    datetime64[us]
 5   platform_proposal  415 non-null    object        
dtypes: datetime64[us](3), object(3)
memory usage: 19.6+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7677 entries, 0 to 7676
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   id        7677 non-null   object        
 1   proposal  7677 non-null   object        
 2   voter     7677 non-null   object        
 3   date      7677 non-null   datetime64[us]
dtypes: d

In [5]:
df = to_lenskit(dfv)
df

Unnamed: 0,user,item,timestamp,rating
0,0xb1f881f47bab744e7283851bc090baa626df931d,08cf9283-73d7-518a-808f-000984e42821,2022-06-17 09:57:28,1
1,0xb957dccaa1ccfb1eb78b495b499801d591d8a403,08cf9283-73d7-518a-808f-000984e42821,2022-06-17 11:49:24,1
2,0xd423b5598f2ed21a91cb79ea945c3c10836bc659,08cf9283-73d7-518a-808f-000984e42821,2022-06-17 13:33:38,1
3,0x3eedb85da27cac4ec83ab5a8a38f18b0edbc7042,08cf9283-73d7-518a-808f-000984e42821,2022-06-17 16:24:02,1
4,0x8dc4310f20d59ba458b76a62141697717f93fa41,08cf9283-73d7-518a-808f-000984e42821,2022-06-17 20:34:02,1
...,...,...,...,...
7672,0xbabb038737a7ae0dca02075e79ed5b7704c29827,416c6597-8fa2-5674-a75d-391e321554da,2023-07-16 22:22:45,1
7673,0xbabb038737a7ae0dca02075e79ed5b7704c29827,344bb922-98c2-5ef2-ab09-ed114cb7573b,2023-07-16 22:23:02,1
7674,0xbabb038737a7ae0dca02075e79ed5b7704c29827,3a773b95-59e5-55a2-bc4c-0bf8c4ff19cd,2023-07-16 22:23:22,1
7675,0xbabb038737a7ae0dca02075e79ed5b7704c29827,d2940d14-4d66-5150-acdf-037894d5f6a0,2023-07-16 22:23:43,1


## Split in folds

In [6]:
all_folds = { f.end:f for f in cvtt_open(
    df, SPLITS_FREQ, dfp.reset_index(), remove_not_in_train_col='item', col_item='item', last_fold=LAST_FOLD_DATE_STR,
)}
last_folds_idx = list(all_folds.keys())[-LAST_FOLDS:]
last_folds_idx

[Timestamp('2023-05-12 00:00:00'),
 Timestamp('2023-05-19 00:00:00'),
 Timestamp('2023-05-26 00:00:00'),
 Timestamp('2023-06-02 00:00:00'),
 Timestamp('2023-06-09 00:00:00'),
 Timestamp('2023-06-16 00:00:00'),
 Timestamp('2023-06-23 00:00:00'),
 Timestamp('2023-06-30 00:00:00'),
 Timestamp('2023-07-07 00:00:00'),
 Timestamp('2023-07-14 00:00:00')]

# Item-based KNN

In [7]:
def testHParamsItemKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = item_knn.ItemItem(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsItemKNN(last_folds_idx[-1],5,'14d'))

Numba is using threading layer omp - consider TBB


found 1 potential runtime problems - see https://boi.st/lkpy-perf


  b = blocks[bi]




fold_t             2023-07-14 00:00:00
time_train                     5.52973
time_rec                      0.001977
open_proposals                       0
min_recs                           NaN
avg_recs                           NaN
precision@1                        0.0
precision@3                        0.0
precision@5                        0.0
precision@10                       0.0
precision@15                       0.0
precision@100                      0.0
ndcg@1                             0.0
ndcg@3                             0.0
ndcg@5                             0.0
ndcg@10                            0.0
ndcg@15                            0.0
ndcg@100                           0.0
map@1                              0.0
map@3                              0.0
map@5                              0.0
map@10                             0.0
map@15                             0.0
map@100                            0.0
recall@1                           0.0
recall@3                 

## Exploring hparams

In [8]:
results = explore_hparams(
    testHParamsItemKNN, 
    ParameterGrid({
    'fold': last_folds_idx,
    'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
    'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('itemknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfi = pd.DataFrame(results)
mdfi

Restored checkpoint from ../.cache/Aura Finance/hparams-itemknn_7d_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2023-05-12,1,7d,2023-05-12,0.007597,0.006988,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.128402
1,2023-05-12,1,14d,2023-05-12,0.002988,0.006591,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.131330
2,2023-05-12,1,21d,2023-05-12,0.015516,0.007610,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.132373
3,2023-05-12,1,30d,2023-05-12,0.015669,0.009785,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.134706
4,2023-05-12,1,60d,2023-05-12,0.015943,0.012488,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.131678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2023-07-14,15,21d,2023-07-14,0.003375,0.001112,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.070612
766,2023-07-14,15,30d,2023-07-14,0.001042,0.001259,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068151
767,2023-07-14,15,60d,2023-07-14,0.001973,0.001694,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067862
768,2023-07-14,15,90d,2023-07-14,0.005185,0.002189,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068734


### Best overall hparams

In [9]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfi.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfi[mdfi['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,15,0.003026,4.133333,0.272593,0.183704,0.019259,0.232002,0.242155,0.244584,0.581738,0.607287,0.613348,0.574784,0.652742,0.666667,0.574784,0.605784,0.607804
7d,10,0.001474,4.133333,0.272593,0.183704,0.019259,0.232002,0.242155,0.244584,0.581738,0.607287,0.613348,0.574784,0.652742,0.666667,0.574784,0.605784,0.607804
7d,9,0.003308,4.133333,0.272593,0.183704,0.019259,0.232002,0.242155,0.244584,0.581738,0.607287,0.613348,0.574784,0.652742,0.666667,0.574784,0.605784,0.607804
7d,8,0.003423,4.133333,0.272593,0.183704,0.019259,0.232002,0.242155,0.244584,0.581738,0.607287,0.613348,0.574784,0.652742,0.666667,0.574784,0.605784,0.607804
7d,7,0.001195,4.133333,0.272593,0.183704,0.019259,0.232002,0.242155,0.244584,0.581738,0.607287,0.613348,0.574784,0.652742,0.666667,0.574784,0.605784,0.607804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10YE,1,0.034409,4.250000,0.217028,0.149041,0.015460,0.494231,0.524135,0.527551,0.485754,0.512329,0.516537,0.568051,0.659579,0.666667,0.560734,0.598855,0.600298
10YE,4,0.034209,4.250000,0.215176,0.149041,0.015460,0.491897,0.522564,0.525979,0.482914,0.508907,0.513115,0.565443,0.659579,0.666667,0.560230,0.598855,0.600298
10YE,5,0.034619,4.250000,0.215176,0.149041,0.015460,0.491897,0.522564,0.525979,0.482914,0.508907,0.513115,0.565443,0.659579,0.666667,0.560230,0.598855,0.600298
10YE,3,0.034739,4.250000,0.215176,0.149041,0.015460,0.490600,0.521739,0.525155,0.481247,0.508054,0.512263,0.565443,0.659579,0.666667,0.560230,0.598855,0.600298


Now let's see the behaviour in each fold

In [10]:
best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'itemknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Aura Finance/models/itemknn-best-avg_7d_normalize.parquet


  best_avg_hparams = mdfi.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-12,7d,15,2023-05-12,0.002855,0.005775,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124036
2023-05-19,7d,15,2023-05-19,0.000932,0.006892,11,5.0,9.8,0.6,0.466667,0.52,0.52,...,0.874675,1.0,1.0,0.411255,0.361688,0.512338,0.66039,0.678571,0.678571,0.1289
2023-05-26,7d,15,2023-05-26,0.001008,0.004516,4,4.0,4.0,1.0,0.777778,0.533333,0.266667,...,1.0,1.0,1.0,0.416667,0.916667,1.0,1.0,1.0,1.0,0.127062
2023-06-02,7d,15,2023-06-02,0.005699,0.007787,7,7.0,7.0,0.833333,0.722222,0.7,0.516667,...,1.0,1.0,1.0,0.228571,0.43254,0.660714,0.791667,0.791667,0.791667,0.127733
2023-06-09,7d,15,2023-06-09,0.001488,0.005407,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124793
2023-06-16,7d,15,2023-06-16,0.004799,0.003032,2,2.0,2.0,1.0,0.5,0.3,0.15,...,1.0,1.0,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.12662
2023-06-23,7d,15,2023-06-23,0.001802,0.000855,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069211
2023-06-30,7d,15,2023-06-30,0.000937,0.006803,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124099
2023-07-07,7d,15,2023-07-07,0.009525,0.000828,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069781
2023-07-14,7d,15,2023-07-14,0.001046,0.000884,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068532


### Best hparams by fold

This are the parameters used to check which model is the best (validation)

In [11]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfi.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'itemknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Aura Finance/models/itemknn-best-val_7d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-12,1,7d,0.007597,1.0,0.2,0.1,0.01,0.444444,0.444444,0.444444,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-05-19,7,7d,0.000921,9.8,0.52,0.52,0.06,0.27154,0.317902,0.339756,0.558,0.649478,0.704024,0.512338,0.874675,1.0,0.512338,0.66039,0.678571
2023-05-26,6,7d,0.015302,4.0,0.533333,0.266667,0.026667,0.333333,0.333333,0.333333,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-02,3,7d,0.001285,7.0,0.7,0.516667,0.051667,0.291474,0.336496,0.336496,0.677639,0.816104,0.816104,0.660714,1.0,1.0,0.660714,0.791667,0.791667
2023-06-09,6,90d,0.00546,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-16,15,7d,0.004799,2.0,0.3,0.15,0.015,0.166667,0.166667,0.166667,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-23,3,14d,0.000958,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-06-30,8,10YE,0.039698,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-07-07,7,7d,0.000896,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-07-14,2,14d,0.000929,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
best_hparams.tail(len(best_hparams)-1).describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,6.0,6.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-16 00:00:00,0.007805,0.007774,2.888889,3.333333,4.133333,0.603704,0.348148,0.272593,0.183704,...,0.652742,0.666667,0.666667,0.422944,0.523433,0.574784,0.605784,0.607804,0.607804,0.109429
min,2023-05-19 00:00:00,0.000896,0.000794,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068282
25%,2023-06-02 00:00:00,0.000929,0.001617,0.0,1.25,1.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.092661
50%,2023-06-16 00:00:00,0.001285,0.004732,1.0,3.0,3.0,0.833333,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.411255,0.43254,0.660714,0.791667,0.791667,0.791667,0.121601
75%,2023-06-30 00:00:00,0.00546,0.007715,4.0,4.75,6.25,1.0,0.5,0.52,0.266667,...,1.0,1.0,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.127317
max,2023-07-14 00:00:00,0.039698,0.026012,11.0,7.0,9.8,1.0,0.777778,0.7,0.52,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.128297
std,,0.012844,0.008695,3.822448,2.42212,3.592585,0.470946,0.301181,0.261267,0.208336,...,0.491224,0.5,0.5,0.41042,0.459939,0.464442,0.469052,0.469356,0.469356,0.025262


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb).

In [13]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfi.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'itemknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams[display_columns]

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Aura Finance/models/itemknn-best-test_7d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-19,1,7d,0.001137,9.8,0.52,0.52,0.06,0.224096,0.270459,0.292312,0.408,0.499478,0.554024,0.512338,0.874675,1.0,0.390422,0.575487,0.598214
2023-05-26,7,7d,0.00092,4.0,0.533333,0.266667,0.026667,0.333333,0.333333,0.333333,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-02,6,7d,0.00131,7.0,0.7,0.516667,0.051667,0.291474,0.336496,0.336496,0.677639,0.816104,0.816104,0.660714,1.0,1.0,0.660714,0.791667,0.791667
2023-06-09,3,7d,0.001536,1.0,0.2,0.1,0.01,0.4,0.4,0.4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-16,6,90d,0.005269,2.0,0.222222,0.111111,0.011111,0.565465,0.565465,0.565465,0.666667,0.666667,0.666667,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-23,15,7d,0.001802,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-06-30,3,14d,0.00092,1.0,0.2,0.1,0.01,0.625,0.625,0.625,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-07-07,8,10YE,0.042845,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-07-14,7,7d,0.000926,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,6.0,6.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-16 00:00:00,0.006296,0.031644,2.888889,3.333333,4.133333,0.507407,0.326337,0.263951,0.179383,...,0.652742,0.666667,0.666667,0.399495,0.505702,0.561237,0.59635,0.598876,0.598876,0.107249
min,2023-05-19 00:00:00,0.00092,0.000855,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069211
25%,2023-06-02 00:00:00,0.000926,0.004518,0.0,1.25,1.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069958
50%,2023-06-16 00:00:00,0.00131,0.007697,1.0,3.0,3.0,0.4,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.228571,0.43254,0.660714,0.791667,0.791667,0.791667,0.124769
75%,2023-06-30 00:00:00,0.001802,0.012648,4.0,4.75,6.25,1.0,0.4,0.52,0.266667,...,1.0,1.0,1.0,0.833333,1.0,1.0,1.0,1.0,1.0,0.126662
max,2023-07-14 00:00:00,0.042845,0.228649,11.0,7.0,9.8,1.0,0.777778,0.7,0.52,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.127348
std,,0.013774,0.074051,3.822448,2.42212,3.592585,0.453927,0.293146,0.261533,0.209523,...,0.491224,0.5,0.5,0.432666,0.469921,0.468253,0.468671,0.468605,0.468605,0.028263


# User-based KNN

In [15]:
def testHParamsUserKNN(fold, k: int, window_size=None):
    # Create algorithm
    algo = user_knn.UserUser(
        feedback='implicit', # VERY IMPORTANT
        min_sim=0,
        # min_nbrs=0,
        nnbrs=k,  # the maximum number of neighbors for scoring each item (None for unlimited)
    )
    return test_with_hparams_lenskit(algo, all_folds[fold], K_RECOMMENDATIONS, window_size)

pd.Series(testHParamsUserKNN(last_folds_idx[-1],5,'14d'))



fold_t             2023-07-14 00:00:00
time_train                    0.605405
time_rec                       0.00138
open_proposals                       0
min_recs                           NaN
avg_recs                           NaN
precision@1                        0.0
precision@3                        0.0
precision@5                        0.0
precision@10                       0.0
precision@15                       0.0
precision@100                      0.0
ndcg@1                             0.0
ndcg@3                             0.0
ndcg@5                             0.0
ndcg@10                            0.0
ndcg@15                            0.0
ndcg@100                           0.0
map@1                              0.0
map@3                              0.0
map@5                              0.0
map@10                             0.0
map@15                             0.0
map@100                            0.0
recall@1                           0.0
recall@3                 

## Exploring hparams

In [16]:
results = explore_hparams(
    testHParamsUserKNN, 
    ParameterGrid({
        'fold': last_folds_idx,
        'k': ITEMKNN_Ks, # ,20,25,30,35,40,45,50,60,70,80,90,100],
        'window_size': WINDOW_SIZES,
    }),
    paths.hparams_progress('userknn', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE),
)
mdfu = pd.DataFrame(results)
mdfu

Restored checkpoint from ../.cache/Aura Finance/hparams-userknn_7d_normalize.pkl with 770 results


  0%|          | 0/770 [00:00<?, ?it/s]

Unnamed: 0,fold,k,window_size,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
0,2023-05-12,1,7d,2023-05-12,0.000429,0.812252,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126939
1,2023-05-12,1,14d,2023-05-12,0.000470,0.003842,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.125733
2,2023-05-12,1,21d,2023-05-12,0.000373,0.003668,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123949
3,2023-05-12,1,30d,2023-05-12,0.000675,0.004705,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124350
4,2023-05-12,1,60d,2023-05-12,0.000694,0.006088,1,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,2023-07-14,15,21d,2023-07-14,0.000312,0.000848,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067262
766,2023-07-14,15,30d,2023-07-14,0.000362,0.000967,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067453
767,2023-07-14,15,60d,2023-07-14,0.000438,0.001252,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067052
768,2023-07-14,15,90d,2023-07-14,0.000643,0.001680,0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067463


### Best overall hparams

In [17]:
display_columns = ['time_train', 'avg_recs'] + [ c for c in mdfu.columns if c.endswith('@5') or c.endswith('@10') or c.endswith('@100') ]
overall_hparams = mdfu[mdfu['fold'] > last_folds_idx[0]].groupby(['window_size', 'k']).mean().sort_values(OPTIM_METRIC, ascending=False)
overall_hparams[display_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
window_size,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
7d,1,0.000302,4.133333,0.272593,0.183704,0.019259,0.228584,0.238738,0.241166,0.570627,0.596176,0.602236,0.574784,0.652742,0.666667,0.561237,0.596350,0.598876
7d,15,0.000314,4.133333,0.272593,0.183704,0.019259,0.226730,0.236884,0.239312,0.565071,0.590620,0.596681,0.574784,0.652742,0.666667,0.561237,0.596350,0.598876
7d,2,0.000308,4.133333,0.272593,0.183704,0.019259,0.226730,0.236884,0.239312,0.565071,0.590620,0.596681,0.574784,0.652742,0.666667,0.561237,0.596350,0.598876
7d,3,0.000311,4.133333,0.272593,0.183704,0.019259,0.226730,0.236884,0.239312,0.565071,0.590620,0.596681,0.574784,0.652742,0.666667,0.561237,0.596350,0.598876
7d,4,0.000309,4.133333,0.272593,0.183704,0.019259,0.226730,0.236884,0.239312,0.565071,0.590620,0.596681,0.574784,0.652742,0.666667,0.561237,0.596350,0.598876
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10YE,8,0.003620,4.250000,0.220731,0.151819,0.015460,0.511338,0.543947,0.545161,0.513333,0.540284,0.541967,0.554971,0.663767,0.666667,0.564765,0.601123,0.602566
10YE,9,0.003657,4.250000,0.220731,0.151819,0.015460,0.511338,0.543947,0.545161,0.513333,0.540284,0.541967,0.554971,0.663767,0.666667,0.564765,0.601123,0.602566
10YE,10,0.003606,4.250000,0.220731,0.151819,0.015460,0.511338,0.543947,0.545161,0.513333,0.540284,0.541967,0.554971,0.663767,0.666667,0.564765,0.601123,0.602566
10YE,2,0.003609,4.250000,0.220731,0.151819,0.015460,0.505411,0.539204,0.540418,0.507531,0.536211,0.537895,0.554971,0.663767,0.666667,0.564765,0.601123,0.602566


In [18]:
best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])
paths.save_model_results(best_avg_hparams, 'userknn-best-avg', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_avg_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Aura Finance/models/userknn-best-avg_7d_normalize.parquet


  best_avg_hparams = mdfu.set_index(['window_size', 'k']).loc[overall_hparams.iloc[0].name].reset_index().set_index(['fold', 'window_size', 'k'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,window_size,k,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-12,7d,1,2023-05-12,0.000429,0.812252,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.126939
2023-05-19,7d,1,2023-05-19,0.000354,0.004228,11,5.0,9.8,0.4,0.466667,0.52,0.52,...,0.874675,1.0,1.0,0.116883,0.20211,0.390422,0.575487,0.598214,0.598214,0.126942
2023-05-26,7d,1,2023-05-26,0.000339,0.002716,4,4.0,4.0,1.0,0.777778,0.533333,0.266667,...,1.0,1.0,1.0,0.416667,0.916667,1.0,1.0,1.0,1.0,0.125674
2023-06-02,7d,1,2023-06-02,0.000301,0.004744,7,7.0,7.0,0.833333,0.722222,0.7,0.516667,...,1.0,1.0,1.0,0.228571,0.43254,0.660714,0.791667,0.791667,0.791667,0.127559
2023-06-09,7d,1,2023-06-09,0.00029,0.00331,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124227
2023-06-16,7d,1,2023-06-16,0.000296,0.001997,2,2.0,2.0,1.0,0.5,0.3,0.15,...,1.0,1.0,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.12413
2023-06-23,7d,1,2023-06-23,0.000314,0.000772,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069008
2023-06-30,7d,1,2023-06-30,0.000268,0.003909,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123333
2023-07-07,7d,1,2023-07-07,0.000289,0.000672,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069492
2023-07-14,7d,1,2023-07-14,0.000266,0.000701,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067182


### Best hparams by fold

In [19]:
print("Best hyperparams by fold", OPTIM_METRIC)
best_hparams = mdfu.sort_values(OPTIM_METRIC,ascending=False).drop_duplicates(['fold'], keep='first').sort_values('fold').set_index(['fold', 'k', 'window_size'])
paths.save_model_results(best_hparams, 'userknn-best-val', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
best_hparams[display_columns]

Best hyperparams by fold map@10
Saved dataframe into /home/daviddavo/recsys4daos/data/output/Aura Finance/models/userknn-best-val_7d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-05-12,1,7d,0.000429,1.0,0.2,0.1,0.01,0.444444,0.444444,0.444444,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-05-19,5,60d,0.000653,10.4,0.5,0.53,0.056,0.481786,0.617773,0.6287,0.482,0.60305,0.621232,0.420058,0.968687,1.0,0.457225,0.678623,0.69161
2023-05-26,6,7d,0.0003,4.0,0.533333,0.266667,0.026667,0.333333,0.333333,0.333333,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-02,1,7d,0.000301,7.0,0.7,0.516667,0.051667,0.291474,0.336496,0.336496,0.677639,0.816104,0.816104,0.660714,1.0,1.0,0.660714,0.791667,0.791667
2023-06-09,6,90d,0.000797,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-16,15,7d,0.000295,2.0,0.3,0.15,0.015,0.166667,0.166667,0.166667,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-06-23,3,14d,0.000294,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-06-30,8,10YE,0.00361,1.0,0.2,0.1,0.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2023-07-07,7,14d,0.000348,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-07-14,5,14d,0.000359,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
best_hparams.tail(len(best_hparams)-1).describe()[display_columns]

Unnamed: 0,time_train,avg_recs,precision@5,precision@10,precision@100,ndcg@5,ndcg@10,ndcg@100,map@5,map@10,map@100,recall@5,recall@10,recall@100,r-precision@5,r-precision@10,r-precision@100
count,9.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,0.000773,4.233333,0.27037,0.184815,0.018815,0.363696,0.383808,0.385022,0.573293,0.602128,0.604149,0.56453,0.663187,0.666667,0.56866,0.60781,0.609253
min,0.000294,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0003,1.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.000348,3.0,0.2,0.1,0.01,0.291474,0.333333,0.333333,0.677639,0.816104,0.816104,0.660714,1.0,1.0,0.660714,0.791667,0.791667
75%,0.000653,6.25,0.5,0.266667,0.026667,0.481786,0.617773,0.6287,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,0.00361,10.4,0.7,0.53,0.056,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,0.001079,3.785058,0.258975,0.210371,0.021667,0.397727,0.404349,0.405155,0.465501,0.469929,0.469973,0.467004,0.497493,0.5,0.465729,0.469357,0.469622


### Results of using best hparams in next fold

Kind of like the cvtt from [the LightGCN notebook](./11_microsoft_tuning.ipynb)

In [21]:
next_hparams_idx = best_hparams.index.to_frame(False)
next_hparams_idx['fold'] = next_hparams_idx['fold'].shift(-1)
next_hparams_idx = next_hparams_idx.dropna()
assert len(next_hparams_idx) == len(best_hparams)-1
next_hparams = mdfu.set_index(['fold', 'k', 'window_size']).loc[pd.MultiIndex.from_frame(next_hparams_idx)]
paths.save_model_results(next_hparams, 'userknn-best-test', ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, K_RECOMMENDATIONS)
next_hparams

Saved dataframe into /home/daviddavo/recsys4daos/data/output/Aura Finance/models/userknn-best-test_7d_normalize.parquet


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
fold,k,window_size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-05-19,1,7d,2023-05-19,0.000354,0.004228,11,5.0,9.8,0.4,0.466667,0.52,0.52,...,0.874675,1.0,1.0,0.116883,0.20211,0.390422,0.575487,0.598214,0.598214,0.126942
2023-05-26,5,60d,2023-05-26,0.000684,0.008757,4,4.0,4.0,0.777778,0.62963,0.422222,0.211111,...,1.0,1.0,1.0,0.5,0.928571,1.0,1.0,1.0,1.0,0.124496
2023-06-02,6,7d,2023-06-02,0.000364,0.00488,7,7.0,7.0,0.833333,0.722222,0.7,0.516667,...,1.0,1.0,1.0,0.228571,0.43254,0.660714,0.791667,0.791667,0.791667,0.12849
2023-06-09,1,7d,2023-06-09,0.00029,0.00331,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.124227
2023-06-16,6,90d,2023-06-16,0.000789,0.008672,2,2.0,2.0,0.888889,0.37037,0.222222,0.111111,...,1.0,1.0,1.0,0.9375,1.0,1.0,1.0,1.0,1.0,0.125469
2023-06-23,15,7d,2023-06-23,0.000362,0.000705,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06725
2023-06-30,3,14d,2023-06-30,0.000399,0.004101,1,1.0,1.0,1.0,0.333333,0.2,0.1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123841
2023-07-07,8,10YE,2023-07-07,0.003656,0.009429,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066658
2023-07-14,7,14d,2023-07-14,0.000383,0.000839,0,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067276


In [22]:
next_hparams.describe()

Unnamed: 0,fold_t,time_train,time_rec,open_proposals,min_recs,avg_recs,precision@1,precision@3,precision@5,precision@10,...,recall@10,recall@15,recall@100,r-precision@1,r-precision@3,r-precision@5,r-precision@10,r-precision@15,r-precision@100,time_eval
count,9,9.0,9.0,9.0,6.0,6.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
mean,2023-06-16 00:00:00,0.000809,0.004991,2.888889,3.333333,4.133333,0.544444,0.317284,0.251605,0.17321,...,0.652742,0.666667,0.666667,0.420328,0.507025,0.561237,0.59635,0.598876,0.598876,0.106072
min,2023-05-19 00:00:00,0.00029,0.000705,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066658
25%,2023-06-02 00:00:00,0.000362,0.00331,0.0,1.25,1.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067276
50%,2023-06-16 00:00:00,0.000383,0.004228,1.0,3.0,3.0,0.777778,0.333333,0.2,0.1,...,1.0,1.0,1.0,0.228571,0.43254,0.660714,0.791667,0.791667,0.791667,0.124227
75%,2023-06-30 00:00:00,0.000684,0.008672,4.0,4.75,6.25,0.888889,0.466667,0.422222,0.211111,...,1.0,1.0,1.0,0.9375,1.0,1.0,1.0,1.0,1.0,0.125469
max,2023-07-14 00:00:00,0.003656,0.009429,11.0,7.0,9.8,1.0,0.722222,0.7,0.52,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.12849
std,,0.001081,0.003303,3.822448,2.42212,3.592585,0.444653,0.271383,0.249577,0.207438,...,0.491224,0.5,0.5,0.447874,0.471238,0.468253,0.468671,0.468605,0.468605,0.029293
