In [34]:
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook as tqdm
from tqdm import tqdm
from sklearn.metrics import ndcg_score
tqdm.pandas()


In [35]:
# Read interaction matrix pickle file
size = 'demo'
type_ = 'train'
fillna_value = '0'
interaction_matrix_file_path = f'./files/pickle/interaction_matrix_{type_}_{size}_{fillna_value}.pkl'
interaction_matrix_df = pd.read_pickle(interaction_matrix_file_path)
print('Interaction matrix df shape:                      ',interaction_matrix_df.shape)

user_matrix_df_file_path = f'./files/pickle/user_matrix_{type_}_{size}_{fillna_value}.pkl'
article_matrix_df_file_path = f'./files/pickle/article_matrix_{type_}_{size}_{fillna_value}.pkl'

user_matrix_df = pd.read_pickle(user_matrix_df_file_path)
article_matrix_df = pd.read_pickle(article_matrix_df_file_path)

# Read history and behavior files

behaviors_val_path = f'./files/parquet/ebnerd_{size}/{type_}/behaviors.parquet'
behaviors_val_df = pd.read_parquet(behaviors_val_path)
print(f'Raw {size} validation behaviors df shape:          ',behaviors_val_df.shape)

# Load model
from tensorflow.keras.models import load_model
model = load_model('./files/models/model_20240702-215442.h5py')

# Load the recoammendation list
# factorized_size = 'small'
# fillnan_value = 'mean_column'
#recommendations_full_lst_file_path = f'./files/pickle/recommendations_behaviors_{val_size}_factorized_{factorized_size}_fillnan_{fil lnan_value}_new.pkl'
# with open(recommendations_full_lst_file_path, 'rb') as f:
#    recommendations_full_lst = pickle.load(f)

Interaction matrix df shape:                       (1590, 1114)
Raw demo validation behaviors df shape:           (24724, 17)


In [36]:
behaviors_val_df.head(2)

Unnamed: 0,impression_id,article_id,impression_time,read_time,scroll_percentage,device_type,article_ids_inview,article_ids_clicked,user_id,is_sso_user,gender,postcode,age,is_subscriber,session_id,next_read_time,next_scroll_percentage
0,48401,,2023-05-21 21:06:50,21.0,,2,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],22779,False,,,,False,21,16.0,27.0
1,152513,9778745.0,2023-05-24 07:31:26,30.0,100.0,1,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],150224,False,,,,False,298,2.0,48.0


In [37]:
# user_vectors = user_matrix_df.values
# article_vectors = article_matrix_df.values
# interaction_matrix = interaction_matrix_df.values

In [38]:
# # Get the indices of the non-zero entries in the interaction matrix
# user_idx, article_idx = np.where(interaction_matrix != 0) # intraction_matrix ->interaction_matrix_normalized
# read_times = interaction_matrix[user_idx, article_idx]  # intraction_matrix ->interaction_matrix_normalized

In [39]:
user_matrix_df.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11313,-0.070924,-0.217349,0.236625,-0.100848,-0.019888,0.14884,0.232078,0.353403,-0.040379,0.044742,...,-0.000936,0.00936,-0.000401,0.006503,0.006575,-0.001493,0.024334,0.002544,-0.013796,0.002299
13538,0.068525,-0.240215,0.019002,-0.03533,0.124994,-0.148824,0.032523,-0.021147,-0.130807,0.177744,...,0.072049,-0.012922,0.015489,-0.029724,0.025154,-0.029744,0.118709,-0.008912,-0.042732,0.013632
15430,0.433257,-0.075038,-0.129888,0.062601,0.640641,-0.235913,0.131881,0.015557,-0.326189,-0.075566,...,0.000717,0.027255,-0.055797,0.068723,0.050216,-0.003963,0.125391,-0.009215,-0.037573,0.008591
19181,-0.247978,-2.387947,-0.858417,0.51563,-0.183754,-0.35578,-1.576106,-1.469467,-0.106678,1.032562,...,0.037641,-0.008951,-0.01729,-0.005728,-0.029735,-0.001142,0.276328,-0.025773,-0.079617,0.114347
19568,0.006248,-0.016308,-0.027725,-0.008185,0.024533,0.021312,0.013478,-0.000567,-0.004255,0.014872,...,-0.000827,0.001498,-0.002634,0.001637,0.000933,-0.000821,0.002896,-0.000237,-0.000967,0.001088


In [40]:
article_matrix_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
9775489,0.366531,0.146713,-0.164704,0.04036,-0.17325,0.126776,-0.270314,0.193243,-0.373554,-0.752814,...,-0.047949,0.048509,0.022732,0.01772,0.017944,0.265457,0.341005,0.011712,-0.146772,0.0847
9775567,-0.158633,-0.051683,0.110599,-0.19692,0.024415,0.170163,0.101929,0.386819,0.136305,0.025901,...,-0.042049,0.064314,-0.021875,0.041001,0.03735,-0.010075,0.119786,-0.004473,-0.042979,0.01661
9776715,-0.115771,-0.818774,-0.444051,0.905837,-0.434201,-0.083792,0.373425,-0.626112,-1.02166,-0.913861,...,0.160496,-0.057085,0.082577,-0.0578,0.128208,-0.034508,0.174711,0.011084,-0.086246,0.058854
9776855,-0.20205,0.584429,0.700055,-0.925909,-0.606299,0.467376,0.74432,0.802365,-0.959991,-0.374876,...,-0.777989,-0.595453,0.185943,0.082115,0.164,-0.01243,0.328688,0.064689,-0.261553,0.050757
9776897,1.031081,-0.996853,0.86772,0.093824,0.472688,0.47118,-0.667084,0.832021,-0.39977,1.084673,...,-0.045409,0.756019,-0.101719,0.840621,0.407071,-0.446002,1.284542,0.158753,-0.911557,0.117081


In [41]:
interaction_matrix_df

merge_article_ids,9775489,9775567,9776715,9776855,9776897,9778627,9778701,9779860,9754160,9768308,...,9761531,9432425,9436758,9733858,9709983,9712694,9725584,9767554,9695851,9750696
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11313,13.0,108.0,12.0,4.0,11.0,51.0,9.0,21.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13538,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15430,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2581162,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0
2583035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2583477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2585449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0


In [42]:
X = np.hstack((user_matrix_df.loc[11313], article_matrix_df.loc[9775489]))

In [43]:
X= X.reshape((-1, 600))

In [44]:
model.predict(X)

array([[11.51887]], dtype=float32)

In [45]:
interaction_matrix_df.head(2)

merge_article_ids,9775489,9775567,9776715,9776855,9776897,9778627,9778701,9779860,9754160,9768308,...,9761531,9432425,9436758,9733858,9709983,9712694,9725584,9767554,9695851,9750696
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11313,13.0,108.0,12.0,4.0,11.0,51.0,9.0,21.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13538,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
model.predict(X)[0][0]


11.51887

In [47]:
# Function
def predict_time(model,article_id_list,user_id):
    expected_read_times = []
    for article_id in article_id_list:
        try:
            vector = np.hstack((user_matrix_df.loc[user_id], article_matrix_df.loc[article_id]))
            vector= vector.reshape((-1, 600))
            try:
                predict= model.predict(vector)[0][0]
            except:
                predict =0
            expected_read_times.append(predict)
        except:
            pass
    return expected_read_times

In [48]:
behaviors_val_df.head(2)

Unnamed: 0,impression_id,article_id,impression_time,read_time,scroll_percentage,device_type,article_ids_inview,article_ids_clicked,user_id,is_sso_user,gender,postcode,age,is_subscriber,session_id,next_read_time,next_scroll_percentage
0,48401,,2023-05-21 21:06:50,21.0,,2,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],22779,False,,,,False,21,16.0,27.0
1,152513,9778745.0,2023-05-24 07:31:26,30.0,100.0,1,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],150224,False,,,,False,298,2.0,48.0


In [49]:
behaviors_val_df_dummy = behaviors_val_df.head(100).copy()

In [50]:
behaviors_val_df_dummy['Predicted_read_times']= behaviors_val_df_dummy.progress_apply(lambda row: predict_time(model,row['article_ids_inview'],row['user_id']),axis=1)

100%|██████████| 100/100 [00:24<00:00,  4.08it/s]


In [51]:
behaviors_val_df_dummy

Unnamed: 0,impression_id,article_id,impression_time,read_time,scroll_percentage,device_type,article_ids_inview,article_ids_clicked,user_id,is_sso_user,gender,postcode,age,is_subscriber,session_id,next_read_time,next_scroll_percentage,Predicted_read_times
0,48401,,2023-05-21 21:06:50,21.0,,2,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],22779,False,,,,False,21,16.0,27.0,"[12.889562, 15.58995, 64.6143, 93.03537, 43.13..."
1,152513,9778745.0,2023-05-24 07:31:26,30.0,100.0,1,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],150224,False,,,,False,298,2.0,48.0,"[75.37953, 74.26837, 106.60074, 81.59031, 240...."
2,155390,,2023-05-24 07:30:33,45.0,,1,"[9778369, 9777856, 9778500, 9778021, 9778627, ...",[9777856],160892,False,,,,False,401,215.0,100.0,"[38.01451, 35.36744, 60.81607, 30.366291, 17.7..."
3,214679,,2023-05-23 05:25:40,33.0,,2,"[9776715, 9776406, 9776566, 9776071, 9776808, ...",[9776566],1001055,False,,,,False,1357,40.0,47.0,"[17.094727, 6.8501525, 166.09761, 77.24983, 14..."
4,214681,,2023-05-23 05:31:54,21.0,,2,"[9775202, 9776855, 9776688, 9771995, 9776583, ...",[9776553],1001055,False,,,,False,1358,5.0,49.0,"[15.218635, 82.53052, 204.86954, 70.66979, 25...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2501575,,2023-05-25 06:58:46,35.0,,1,"[9766434, 9779205, 9483850, 9780193, 9432542, ...",[9780181],2091506,True,0.0,,30.0,True,19811,25.0,,"[53.918793, 46.004215, 45.79716, 57.915337, 10..."
96,2510969,,2023-05-25 06:43:11,22.0,,1,"[7213923, 9766434, 9779713, 9779564, 9780193, ...",[9780193],2326725,True,,,,False,19992,6.0,59.0,"[47.61934, 181.52727, 186.09651, 78.87868, 363..."
97,2510974,,2023-05-25 06:43:53,85.0,,1,"[9779747, 9737023, 9774297, 9776985, 9776916, ...",[9779705],2326725,True,,,,False,19992,159.0,100.0,"[261.03445, 62.849846, 355.30496, 117.6164, 22..."
98,2514375,,2023-05-25 06:13:52,23.0,,2,"[9779577, 9779263, 9779968, 9780195, 9780193, ...",[9766434],2430325,True,1.0,,,True,20333,10.0,19.0,"[260.945, 77.45724, 81.99356, 7.530157, 13.788..."


In [52]:
behaviors_val_df_dummy['Predicted_read_times'][0]

[12.889562,
 15.58995,
 64.6143,
 93.03537,
 43.13626,
 109.53412,
 66.95216,
 115.48274,
 10.560135,
 184.25256,
 219.01572]

In [53]:
def sort_zip_lists(row):
    # Zip the two lists
    zipped = list(zip(row['article_ids_inview'], row['Predicted_read_times']))
    # Sort by the second element of each tuple
    sorted_zipped = sorted(zipped, key=lambda x: x[1],reverse=True)[:9]
    return sorted_zipped

In [54]:
behaviors_val_df_dummy['sorted_tuples'] = behaviors_val_df_dummy.apply(sort_zip_lists, axis=1)


In [55]:
behaviors_val_df_dummy['sorted_tuples'][1]

[(9778718, 4112.627),
 (9778661, 240.42564),
 (9778657, 126.89184),
 (9778623, 106.60074),
 (9482970, 92.15602),
 (9089120, 81.59031),
 (9777492, 75.744286),
 (9778669, 75.37953),
 (9778736, 74.26837)]

In [56]:
def get_first_items(tuples_list):
    # Extract the first item from each tuple
    return [t[0] for t in tuples_list]

In [57]:
behaviors_val_df_dummy['Predicted_article_ids'] = behaviors_val_df_dummy['sorted_tuples'].apply(get_first_items)


In [58]:
def reciprocal_rank(row):
    predicted_item_list = row['Predicted_article_ids']
    clicked_article = row['article_ids_clicked'][0]
    try:
        index = predicted_item_list.index(clicked_article)
        # Return the reciprocal rank
        return 1 / (index + 1)
    except ValueError:
        # If the clicked article is not in the predicted list, return 0
        return 0

In [60]:
behaviors_val_df_dummy.head()

Unnamed: 0,impression_id,article_id,impression_time,read_time,scroll_percentage,device_type,article_ids_inview,article_ids_clicked,user_id,is_sso_user,gender,postcode,age,is_subscriber,session_id,next_read_time,next_scroll_percentage,Predicted_read_times,sorted_tuples,Predicted_article_ids
0,48401,,2023-05-21 21:06:50,21.0,,2,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],22779,False,,,,False,21,16.0,27.0,"[12.889562, 15.58995, 64.6143, 93.03537, 43.13...","[(9759966, 219.01572), (9775371, 184.25256), (...","[9759966, 9775371, 9142581, 9759544, 9775402, ..."
1,152513,9778745.0,2023-05-24 07:31:26,30.0,100.0,1,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],150224,False,,,,False,298,2.0,48.0,"[75.37953, 74.26837, 106.60074, 81.59031, 240....","[(9778718, 4112.627), (9778661, 240.42564), (9...","[9778718, 9778661, 9778657, 9778623, 9482970, ..."
2,155390,,2023-05-24 07:30:33,45.0,,1,"[9778369, 9777856, 9778500, 9778021, 9778627, ...",[9777856],160892,False,,,,False,401,215.0,100.0,"[38.01451, 35.36744, 60.81607, 30.366291, 17.7...","[(9778351, 60.8507), (9778500, 60.81607), (977...","[9778351, 9778500, 9778628, 9778369, 9777856, ..."
3,214679,,2023-05-23 05:25:40,33.0,,2,"[9776715, 9776406, 9776566, 9776071, 9776808, ...",[9776566],1001055,False,,,,False,1357,40.0,47.0,"[17.094727, 6.8501525, 166.09761, 77.24983, 14...","[(9776566, 166.09761), (9776046, 113.9991), (9...","[9776566, 9776046, 9776855, 9776071, 9776715, ..."
4,214681,,2023-05-23 05:31:54,21.0,,2,"[9775202, 9776855, 9776688, 9771995, 9776583, ...",[9776553],1001055,False,,,,False,1358,5.0,49.0,"[15.218635, 82.53052, 204.86954, 70.66979, 25....","[(9776688, 204.86954), (9695098, 172.0707), (9...","[9776688, 9695098, 9776553, 9776855, 9776071, ..."


In [61]:
behaviors_val_df_dummy['MMR_rank'] = behaviors_val_df_dummy.progress_apply(reciprocal_rank,axis=1)

100%|██████████| 100/100 [00:00<00:00, 39900.15it/s]


In [62]:
behaviors_val_df_dummy

Unnamed: 0,impression_id,article_id,impression_time,read_time,scroll_percentage,device_type,article_ids_inview,article_ids_clicked,user_id,is_sso_user,...,postcode,age,is_subscriber,session_id,next_read_time,next_scroll_percentage,Predicted_read_times,sorted_tuples,Predicted_article_ids,MMR_rank
0,48401,,2023-05-21 21:06:50,21.0,,2,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],22779,False,...,,,False,21,16.0,27.0,"[12.889562, 15.58995, 64.6143, 93.03537, 43.13...","[(9759966, 219.01572), (9775371, 184.25256), (...","[9759966, 9775371, 9142581, 9759544, 9775402, ...",1.000000
1,152513,9778745.0,2023-05-24 07:31:26,30.0,100.0,1,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],150224,False,...,,,False,298,2.0,48.0,"[75.37953, 74.26837, 106.60074, 81.59031, 240....","[(9778718, 4112.627), (9778661, 240.42564), (9...","[9778718, 9778661, 9778657, 9778623, 9482970, ...",0.500000
2,155390,,2023-05-24 07:30:33,45.0,,1,"[9778369, 9777856, 9778500, 9778021, 9778627, ...",[9777856],160892,False,...,,,False,401,215.0,100.0,"[38.01451, 35.36744, 60.81607, 30.366291, 17.7...","[(9778351, 60.8507), (9778500, 60.81607), (977...","[9778351, 9778500, 9778628, 9778369, 9777856, ...",0.200000
3,214679,,2023-05-23 05:25:40,33.0,,2,"[9776715, 9776406, 9776566, 9776071, 9776808, ...",[9776566],1001055,False,...,,,False,1357,40.0,47.0,"[17.094727, 6.8501525, 166.09761, 77.24983, 14...","[(9776566, 166.09761), (9776046, 113.9991), (9...","[9776566, 9776046, 9776855, 9776071, 9776715, ...",1.000000
4,214681,,2023-05-23 05:31:54,21.0,,2,"[9775202, 9776855, 9776688, 9771995, 9776583, ...",[9776553],1001055,False,...,,,False,1358,5.0,49.0,"[15.218635, 82.53052, 204.86954, 70.66979, 25....","[(9776688, 204.86954), (9695098, 172.0707), (9...","[9776688, 9695098, 9776553, 9776855, 9776071, ...",0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2501575,,2023-05-25 06:58:46,35.0,,1,"[9766434, 9779205, 9483850, 9780193, 9432542, ...",[9780181],2091506,True,...,,30.0,True,19811,25.0,,"[53.918793, 46.004215, 45.79716, 57.915337, 10...","[(9432542, 101.38824), (9779713, 67.86928), (9...","[9432542, 9779713, 9780193, 9766434, 9780195, ...",0.000000
96,2510969,,2023-05-25 06:43:11,22.0,,1,"[7213923, 9766434, 9779713, 9779564, 9780193, ...",[9780193],2326725,True,...,,,False,19992,6.0,59.0,"[47.61934, 181.52727, 186.09651, 78.87868, 363...","[(9780193, 363.30615), (9779713, 186.09651), (...","[9780193, 9779713, 9766434, 9448302, 9547869, ...",1.000000
97,2510974,,2023-05-25 06:43:53,85.0,,1,"[9779747, 9737023, 9774297, 9776985, 9776916, ...",[9779705],2326725,True,...,,,False,19992,159.0,100.0,"[261.03445, 62.849846, 355.30496, 117.6164, 22...","[(9779873, 749.9902), (9780019, 698.55646), (9...","[9779873, 9780019, 9778939, 9779797, 9779748, ...",0.000000
98,2514375,,2023-05-25 06:13:52,23.0,,2,"[9779577, 9779263, 9779968, 9780195, 9780193, ...",[9766434],2430325,True,...,,,True,20333,10.0,19.0,"[260.945, 77.45724, 81.99356, 7.530157, 13.788...","[(9779577, 260.945), (9779205, 85.99841), (977...","[9779577, 9779205, 9779968, 9779263, 9780181, ...",0.166667


In [64]:
behaviors_val_df_dummy['MMR_rank'].sum()/behaviors_val_df_dummy.shape[0]

0.3576746031746032

In [None]:
# # ----- NOT SURE IF THESE TRANSFORMATION ARE NEEDED NEITHER IF WE MAKE THE RECOMMENDATIONS IN THIS DATASET --------
# behaviors_val_df = behaviors_val_df[['user_id','article_ids_inview', 'article_ids_clicked']]
# behaviors_val_df = behaviors_val_df.explode('article_ids_clicked')

# behaviors_val_grouped_clicked = df = behaviors_val_df.groupby('user_id')['article_ids_clicked'].apply(list).reset_index()

# behaviors_val_df = behaviors_val_df.explode('article_ids_inview')

# behaviors_val_grouped_inview_df = behaviors_val_df.groupby('user_id')['article_ids_inview'].apply(list).reset_index()

# behaviors_val_df = pd.merge(behaviors_val_grouped_inview_df, behaviors_val_grouped_clicked, on='user_id', how='inner')

# behaviors_val_df['article_ids_inview_setted_lst'] = behaviors_val_df['article_ids_inview'].apply(lambda lst: list(set(lst)))

In [None]:
# print(behaviors_val_df.shape)
# behaviors_val_df.head(2)

#### Other

In [90]:
# article_ids_clicked_lst = behaviors_val_df['article_ids_clicked'].tolist()
# user_ids_lst = behaviors_val_df['user_id'].tolist()

In [91]:
# counter, precisions, recalls, ndcgs, K = 0, [], [], [], 10



# for user_id, clicked_lst, recommends_lst  in tqdm(list(zip(user_ids_lst, article_ids_clicked_lst, recommendations_full_lst))):
    
#     y_true = clicked_lst
#     y_pred = recommends_lst#[:K]

#     precision = len(set(y_true).intersection(set(y_pred))) / len(y_pred) if len(y_pred) > 0 else 0
#     recall = len(set(y_true).intersection(set(y_pred))) / len(y_true) if len(y_true) > 0 else 0

#     precisions.append(precision)
#     recalls.append(recall)
#     #ndcgs.append(ndcg_score([y_true], [y_pred], k=K))

#     if precision == 0:
#         counter +=1

#     print(f"User id: {user_id}, Length: {len(recommends_lst)}, Percision: {precision}, Recall: {recall}  ")

# print({
#     'precision@K': sum(precisions) / len(precisions)
#     ,'recall@K': sum(recalls) / len(recalls)
#     #,'ndcg@K': sum(ndcgs) / len(ndcgs)
#     })