In [9]:
import pandas as pd
import numpy as np
import pickle
import os
import sys
from contextlib import contextmanager
from tqdm import tqdm
from sklearn.metrics import ndcg_score
tqdm.pandas()
# Load model
from tensorflow.keras.models import load_model
import tensorflow

import logging
tensorflow.get_logger().setLevel(logging.ERROR)


In [10]:
@contextmanager
def suppress_stdout_stderr():
    """
    A context manager to suppress stdout and stderr.
    """
    with open(os.devnull, 'w') as devnull:
        old_stdout = sys.stdout
        old_stderr = sys.stderr
        sys.stdout = devnull
        sys.stderr = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout
            sys.stderr = old_stderr

In [11]:
#Load model
model = load_model('./files/models/model_20240704-175259.h5py/')

# Read interaction matrix pickle file
size = 'demo'
type_ = 'train'
fillna_value = '0'
interaction_matrix_file_path = f'./files/pickle/interaction_matrix_{type_}_{size}_{fillna_value}.pkl'
interaction_matrix_df = pd.read_pickle(interaction_matrix_file_path)
print('Interaction matrix df shape:                      ',interaction_matrix_df.shape)

# Read User and article embeddings dataframes
user_matrix_df_file_path = f'./files/pickle/user_matrix_{type_}_{size}_{fillna_value}.pkl'
article_matrix_df_file_path = f'./files/pickle/article_matrix_{type_}_{size}_{fillna_value}.pkl'

user_matrix_df = pd.read_pickle(user_matrix_df_file_path)
article_matrix_df = pd.read_pickle(article_matrix_df_file_path)

# Read history and behavior files
behaviors_path = f'./files/parquet/ebnerd_{size}/{type_}/behaviors.parquet'
behaviors_df = pd.read_parquet(behaviors_path)
print(f'Raw {size} validation behaviors df shape:          ',behaviors_df.shape)

# Load the recoammendation list
# factorized_size = 'small'
# fillnan_value = 'mean_column'
#recommendations_full_lst_file_path = f'./files/pickle/recommendations_behaviors_{val_size}_factorized_{factorized_size}_fillnan_{fil lnan_value}_new.pkl'
# with open(recommendations_full_lst_file_path, 'rb') as f:
#    recommendations_full_lst = pickle.load(f)

Interaction matrix df shape:                       (1590, 1114)
Raw demo validation behaviors df shape:           (24724, 17)


In [12]:
behaviors_df.head(2)

Unnamed: 0,impression_id,article_id,impression_time,read_time,scroll_percentage,device_type,article_ids_inview,article_ids_clicked,user_id,is_sso_user,gender,postcode,age,is_subscriber,session_id,next_read_time,next_scroll_percentage
0,48401,,2023-05-21 21:06:50,21.0,,2,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],22779,False,,,,False,21,16.0,27.0
1,152513,9778745.0,2023-05-24 07:31:26,30.0,100.0,1,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],150224,False,,,,False,298,2.0,48.0


In [20]:
behaviors_df = behaviors_df[['user_id', 'article_ids_inview', 'article_ids_clicked']].copy()
behaviors_df.head(2)

Unnamed: 0,user_id,article_ids_inview,article_ids_clicked
0,22779,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966]
1,150224,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661]


In [21]:
article_matrix_df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
9775489,0.366531,0.146713,0.164704,-0.04036,-0.17325,-0.126776,-0.270314,0.193243,-0.373554,-0.752814,...,0.047949,0.048509,-0.022732,0.01772,0.017944,-0.265457,-0.341005,0.011712,0.146772,0.0847
9775567,-0.158633,-0.051683,-0.110599,0.19692,0.024415,-0.170163,0.101929,0.386819,0.136305,0.025901,...,0.042049,0.064314,0.021875,0.041001,0.03735,0.010075,-0.119786,-0.004473,0.042979,0.01661


In [22]:
user_matrix_df.head(2)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11313,-0.070924,-0.217349,-0.236625,0.100848,-0.019888,-0.14884,0.232078,0.353403,-0.040379,0.044742,...,0.000936,0.00936,0.000401,0.006503,0.006575,0.001493,-0.024334,0.002544,0.013796,0.002299
13538,0.068525,-0.240215,-0.019002,0.03533,0.124994,0.148824,0.032523,-0.021147,-0.130807,0.177744,...,-0.072049,-0.012922,-0.015489,-0.029724,0.025154,0.029744,-0.118709,-0.008912,0.042732,0.013632


In [23]:
# Example for one row
X = np.hstack((user_matrix_df.loc[11313], article_matrix_df.loc[9775489]))
X= X.reshape((-1, 600))
model.predict(X)[0][0]



14.369877

In [52]:
# Function predict_read_time
def predict_read_time(model,article_id_list,user_id):
    with suppress_stdout_stderr():
        expected_read_times = []
        for article_id in article_id_list:
            try:
                vector = np.hstack((user_matrix_df.loc[user_id], article_matrix_df.loc[article_id]))
                vector= vector.reshape((-1, 600))
                try:
                    predict= model.predict(vector)[0][0]
                except:
                    predict =0
                expected_read_times.append(predict)
            except:
                pass
        return expected_read_times


def sort_zip_lists(row):
    # Zip the two lists
    zipped = list(zip(row['article_ids_inview'], row['Predicted_read_times']))
    # Sort by the second element of each tuple
    sorted_zipped = sorted(zipped, key=lambda x: x[1],reverse=True)[:9]
    return sorted_zipped

def get_final_predicted_article_ids(tuples_list):
    # Extract the first item from each tuple
    return [t[0] for t in tuples_list]

def get_reciprocal_rank(row):
    predicted_item_list = row['Predicted_article_ids']
    clicked_article = row['article_ids_clicked'][0]
    try:
        index = predicted_item_list.index(clicked_article)
        # Return the reciprocal rank
        return 1 / (index + 1)
    except ValueError:
        # If the clicked article is not in the predicted list, return 0
        return 0
    
def calculate_precision(target, predictions):
    tp = predictions.count(target)  # Count true positives
    fp = len(predictions) - tp  # Calculate false positives
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0  # Compute precision
    return precision

def calculate_recall(target, predictions):
    tp = predictions.count(target)  # True Positives: target in predictions
    fn = 1 if tp == 0 else 0  # False Negatives: target not in predictions
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0  # Compute recall
    return recall

In [53]:
behaviors_df_top100 = behaviors_df.head(100).copy()

In [54]:
behaviors_df_top100['Predicted_read_times']= behaviors_df_top100.progress_apply(lambda row: predict_read_time(model,row['article_ids_inview'],row['user_id']),axis=1)

100%|██████████| 100/100 [01:09<00:00,  1.44it/s]


In [55]:
behaviors_df_top100['Predicted_tuples_sorted'] = behaviors_df_top100.progress_apply(sort_zip_lists, axis=1)


100%|██████████| 100/100 [00:00<00:00, 33394.14it/s]


In [56]:
behaviors_df_top100['Predicted_article_ids'] = behaviors_df_top100['Predicted_tuples_sorted'].progress_apply(get_final_predicted_article_ids)

100%|██████████| 100/100 [00:00<?, ?it/s]


In [57]:
behaviors_df_top100['MMR_rank'] = behaviors_df_top100.progress_apply(get_reciprocal_rank,axis=1)

100%|██████████| 100/100 [00:00<00:00, 33346.35it/s]


In [58]:
behaviors_df_top100['Precision@10']= behaviors_df_top100.progress_apply(lambda row: calculate_precision(row['article_ids_clicked'][0],row['Predicted_article_ids']),axis=1)

100%|██████████| 100/100 [00:00<00:00, 33391.48it/s]


In [59]:
behaviors_df_top100['Recall@10']= behaviors_df_top100.progress_apply(lambda row: calculate_recall(row['article_ids_clicked'][0],row['Predicted_article_ids']),axis=1)

100%|██████████| 100/100 [00:00<00:00, 33303.99it/s]


In [60]:
behaviors_df_top100.head(2)

Unnamed: 0,user_id,article_ids_inview,article_ids_clicked,Predicted_read_times,Predicted_tuples_sorted,Predicted_article_ids,MMR_rank,Precision@10,Recall@10
0,22779,"[9774516, 9771051, 9770028, 9775402, 9774461, ...",[9759966],"[18.532225, 11.243594, 13.600227, 23.314312, 1...","[(9775371, 288.13516), (9775402, 23.314312), (...","[9775371, 9775402, 9759966, 9774516, 9142581, ...",0.333333,0.111111,1.0
1,150224,"[9778669, 9778736, 9778623, 9089120, 9778661, ...",[9778661],"[15.440342, 14.135092, 21.227169, 16.32058, 11...","[(9778718, 201.79462), (9778623, 21.227169), (...","[9778718, 9778623, 9778657, 9482970, 9089120, ...",0.0,0.0,0.0


In [61]:
print("MRR: ", behaviors_df_top100['MMR_rank'].sum()/behaviors_df_top100.shape[0])
print("Precision@10: ", behaviors_df_top100['Precision@10'].sum()/behaviors_df_top100.shape[0])
print("Recall@10: ", behaviors_df_top100['Recall@10'].sum()/behaviors_df_top100.shape[0])

MRR:  0.29913492063492064
Precision@10:  0.12789682539682537
Recall@10:  0.86


### Interpretation

- An MRR of approximately 0.299 means that, on average, the true positive item is found at about the third position in the recommendation list (since 1/0.299≈3.34). This is a reasonably good result, indicating that the recommender system often ranks the relevant items near the top of the recommendation list.

- A Precision@10 of approximately 0.128 means that about 12.8% of the items in the top 10 recommendations are relevant. This indicates that for every 10 items recommended, around 1.28 items are relevant. This precision value suggests that there is room for improvement in terms of recommending more relevant items in the top 10.

- A Recall@10 of 0.86 means that the recommender system successfully identifies 86% of the relevant items within the top 10 recommendations. This high recall value indicates that the system is very effective at finding relevant items, though they may not always be ranked at the very top of the list.

### Overall Interpretation
- MRR (0.299): The relevant items are generally ranked around the third position on average.
- Precision@10 (0.128): About 12.8% of the top 10 recommendations are relevant, indicating room for improvement in the quality of top recommendations.
- Recall@10 (0.86): The system successfully finds 86% of the relevant items within the top 10 recommendations, showing strong recall performance.

To improve precision and MRR for your recommender system, you can consider various strategies that involve improving your preprocessing, model, and recommendation logic. Here are some approaches:

1. Data Preprocessing Improvements
    1. Feature Engineering:

        - Add More Features: Incorporate additional features that could influence reading time predictions, such as article length, topic, author, publication date, user preferences, etc.
        - Normalization and Scaling: Ensure that your features are properly normalized and scaled to help the model learn more effectively.
        - Categorical Features: Use techniques like one-hot encoding or embeddings for categorical features (e.g., article categories or authors).

    2. Data Cleaning:

        - Remove Outliers: Identify and remove or handle outliers in reading times to ensure the model isn’t biased by extreme values.
        - Handle Missing Values: Ensure any missing values in the dataset are properly handled through imputation or removal.

    3. Data Augmentation:

        - Synthetic Data: If your dataset is small, consider generating synthetic data to improve model training.

2. Model Improvements

    1. Model Architecture:

        - Experiment with Different Architectures: Try different neural network architectures such as deeper networks, recurrent neural networks (RNNs), transformers, etc.
    - Hyperparameter Tuning: Perform hyperparameter tuning to find the optimal parameters for your model (learning rate, batch size, number of layers, units per layer, etc.).

    2. Training Techniques:

        - Regularization: Use regularization techniques like dropout, L2 regularization, and early stopping to prevent overfitting.
        - Ensemble Methods: Combine predictions from multiple models using ensemble methods (e.g., bagging, boosting) to improve accuracy and robustness.

3. Recommendation Logic Improvements

    1. Post-Processing Predictions:

        - Re-Ranking: After predicting reading times, re-rank articles using additional criteria such as user preferences, recent trends, or article popularity.
        - Hybrid Recommendations:

        - Combine Models: Use a hybrid approach that combines collaborative filtering, content-based filtering, and your read time prediction model to recommend articles.

#### Other

In [None]:
# # ----- NOT SURE IF THESE TRANSFORMATION ARE NEEDED NEITHER IF WE MAKE THE RECOMMENDATIONS IN THIS DATASET --------
# behaviors_val_df = behaviors_val_df[['user_id','article_ids_inview', 'article_ids_clicked']]
# behaviors_val_df = behaviors_val_df.explode('article_ids_clicked')

# behaviors_val_grouped_clicked = df = behaviors_val_df.groupby('user_id')['article_ids_clicked'].apply(list).reset_index()

# behaviors_val_df = behaviors_val_df.explode('article_ids_inview')

# behaviors_val_grouped_inview_df = behaviors_val_df.groupby('user_id')['article_ids_inview'].apply(list).reset_index()

# behaviors_val_df = pd.merge(behaviors_val_grouped_inview_df, behaviors_val_grouped_clicked, on='user_id', how='inner')

# behaviors_val_df['article_ids_inview_setted_lst'] = behaviors_val_df['article_ids_inview'].apply(lambda lst: list(set(lst)))

In [None]:
# print(behaviors_val_df.shape)
# behaviors_val_df.head(2)

In [90]:
# article_ids_clicked_lst = behaviors_val_df['article_ids_clicked'].tolist()
# user_ids_lst = behaviors_val_df['user_id'].tolist()

In [91]:
# counter, precisions, recalls, ndcgs, K = 0, [], [], [], 10



# for user_id, clicked_lst, recommends_lst  in tqdm(list(zip(user_ids_lst, article_ids_clicked_lst, recommendations_full_lst))):
    
#     y_true = clicked_lst
#     y_pred = recommends_lst#[:K]

#     precision = len(set(y_true).intersection(set(y_pred))) / len(y_pred) if len(y_pred) > 0 else 0
#     recall = len(set(y_true).intersection(set(y_pred))) / len(y_true) if len(y_true) > 0 else 0

#     precisions.append(precision)
#     recalls.append(recall)
#     #ndcgs.append(ndcg_score([y_true], [y_pred], k=K))

#     if precision == 0:
#         counter +=1

#     print(f"User id: {user_id}, Length: {len(recommends_lst)}, Percision: {precision}, Recall: {recall}  ")

# print({
#     'precision@K': sum(precisions) / len(precisions)
#     ,'recall@K': sum(recalls) / len(recalls)
#     #,'ndcg@K': sum(ndcgs) / len(ndcgs)
#     })