In [2]:
# Import the necessary libraries

import numpy as np
import pandas as pd

from sklearn.metrics import ndcg_score, roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt

In [3]:
# Read interaction matrix pickle file
size = 'demo'
type_ = 'validation'
fillna_value = '0'
interaction_matrix_file_path = f'./files/pickle/interaction_matrix_{type_}_{size}_{fillna_value}.pkl'
interaction_matrix_df = pd.read_pickle(interaction_matrix_file_path)
print('Interaction matrix df shape:                      ',interaction_matrix_df.shape)

user_matrix_df_file_path = f'./files/pickle/user_matrix_{type_}_{size}_{fillna_value}.pkl'
article_matrix_df_file_path = f'./files/pickle/article_matrix_{type_}_{size}_{fillna_value}.pkl'

user_matrix_df = pd.read_pickle(user_matrix_df_file_path)
article_matrix_df = pd.read_pickle(article_matrix_df_file_path)
print('User embedding df shape:                         ',user_matrix_df.shape)
print('Article embedding df shape:                      ',article_matrix_df.shape)

Interaction matrix df shape:                       (1562, 1144)
User embedding df shape:                          (1562, 300)
Article embedding df shape:                       (1144, 300)


In [None]:
# Recommendation System
def recommend_articles(user_id, U, Vt, user_item_matrix, article_features):
    # Check if the user has enough interactions
    if np.sum(user_item_matrix[user_id, :]) > 0:
        # User has interactions, use collaborative filtering
        user_vector = U[user_id, :].reshape(1, -1)
        predicted_ratings = np.dot(user_vector, Vt).flatten()
    else:
        # User has no interactions, use content-based or popularity-based recommendation
        # Example: Recommend most popular articles
        popularity_scores = np.mean(user_item_matrix, axis=0)  # Average reading times across all users
        predicted_ratings = popularity_scores
    
    # Sort predicted ratings in descending order and get corresponding article indices
    sorted_article_indices = np.argsort(-predicted_ratings)
    
    # Filter out articles that the user has already interacted with
    user_interactions = user_item_matrix[user_id, :]
    articles_not_interacted = np.where(user_interactions == 0)[0]
    
    # Recommend top articles that the user has not interacted with
    recommended_articles = []
    for article_idx in sorted_article_indices:
        if article_idx in articles_not_interacted:
            recommended_articles.append(article_idx)
        if len(recommended_articles) >= 5:  # Recommend top 5 articles
            break
    
    return recommended_articles

# Example usage:
user_id = 0  # Replace with the user ID you want to recommend for
recommended_articles = recommend_articles(user_id, U, Vt, user_item_matrix, article_features)
print(f"Recommended articles for user {user_id}: {recommended_articles}")
