# Recommender System

In [None]:
import pandas as pd
import numpy as np
import os

os.chdir('./enstit/RecommenderSystem')

# Recommender System

In [None]:
import pandas as pd
import numpy as np
from utils.linalg import cosine_similarity

# Build the feedbacks matrix
movies = pd.read_csv('./data/movies.csv', usecols=['movieId', 'title'])
ratings = pd.read_csv('./data/ratings.csv', usecols=['userId', 'movieId', 'rating'])

feedbacks = pd.pivot_table(ratings, values='rating', index=['userId'], columns=['movieId'], aggfunc="mean")#.fillna(0)
feedbacks.reset_index(inplace=True)
feedbacks.columns.name = None

feedbacks_matrix = feedbacks[feedbacks.columns[1:]][:].values
feedbacks_matrix = feedbacks_matrix[:500, :1000]

In [None]:
import numpy as np
from utils.linalg import weighted_wals, inverse_frequency_weights

# Number of latent factors
num_factors = 100

# Maximum number of iterations, regularization parameters, and weights for WALS
lambda_reg = 0.99
lambda_w0 = 0.1
lambda_wi = 0.9

# Tolerance for convergence
tolerance = 1e-3

# Call the Weighted WALS function to get the user and item factors
user_factors, item_factors = weighted_wals(
    matrix_M=feedbacks_matrix,
    num_factors=num_factors,
    weights=inverse_frequency_weights(feedbacks_matrix, lambda_w0=lambda_w0, lambda_wi=lambda_wi),
    lambda_reg=lambda_reg
    )

In [None]:
USER = 236

highest_similarity = -np.inf
highest_sim_row = -1
for row in range(0,user_factors.shape[0]):
    if row == USER:
        continue
    similarity = cosine_similarity(user_factors[USER,:], user_factors[row,:])
    if similarity > highest_similarity:
        highest_similarity = similarity
        highest_sim_row = row

print("User %d is most similar to User %d" % (USER, highest_sim_row))

In [None]:
import numpy as np
from utils.matfac import WeightedMatrixFactorization

model = WeightedMatrixFactorization(feedbacks_matrix)
model.fit()

---

In [1]:
import os

os.chdir('./enstit/RecommenderSystem')

In [3]:
from utils.recsys import RecommenderSystem
import pandas as pd

# Build the feedbacks matrix
movies = pd.read_csv('./data/movies.csv', usecols=['movieId', 'title'])
ratings = pd.read_csv('./data/ratings.csv', usecols=['userId', 'movieId', 'rating'])

feedbacks = pd.pivot_table(ratings, values='rating', index=['userId'], columns=['movieId'], aggfunc="mean")#.fillna(0)
feedbacks.reset_index(inplace=True)
feedbacks.columns.name = None

feedbacks_matrix = feedbacks[feedbacks.columns[1:]][:].values
feedbacks_matrix = feedbacks_matrix[:500, :1000]


user_list = [f"User {i}" for i in range(0, feedbacks_matrix.shape[0])]
movies_list = movies.title.tolist()

rs = RecommenderSystem(reviews=feedbacks_matrix, users=user_list, items=movies_list)

Iteration: 1 -> Loss: 6926.7724359503445
Iteration: 2 -> Loss: 2423.573890006697
Iteration: 3 -> Loss: 1887.0692849462737
Iteration: 4 -> Loss: 1703.913697588726
Iteration: 5 -> Loss: 1615.981799635736
Iteration: 6 -> Loss: 1566.5510466989972
Iteration: 7 -> Loss: 1535.3758747563934
Iteration: 8 -> Loss: 1514.1107987867413
Iteration: 9 -> Loss: 1498.9343012338243
Iteration: 10 -> Loss: 1487.7878516744254


In [23]:
[print(i, rs.get_user_chart(f"User {i}")) for i in range(rs.reviews.shape[0]) if len(rs.get_user_chart(f"User {i}")) < 3]

1 [(4.0, 'Tommy Boy (1995)'), (3.0, 'Shawshank Redemption, The (1994)')]
105 [(5.0, 'Shawshank Redemption, The (1994)')]
147 [(4.0, 'Forrest Gump (1994)'), (3.0, 'Star Wars: Episode V - The Empire Strikes Back (1980)')]
153 [(5.0, 'Forrest Gump (1994)'), (5.0, "Sophie's Choice (1982)")]
162 []
174 [(3.5, 'Secret Garden, The (1993)'), (0.5, 'Mystery Science Theater 3000: The Movie (1996)')]
183 []
235 [(5.0, 'Goodfellas (1990)'), (5.0, 'Cyrano de Bergerac (1990)')]
290 [(4.0, 'Toy Story (1995)')]
305 []
319 [(3.5, 'Parent Trap, The (1961)')]
357 [(4.0, "Breakfast at Tiffany's (1961)"), (3.0, 'One Fine Day (1996)')]
359 []
458 [(4.0, 'Star Wars: Episode IV - A New Hope (1977)')]
480 [(4.0, 'Hear My Song (1991)'), (3.0, 'Fly Away Home (1996)')]


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [28]:
[rs.items[i] for i in rs.get_user_recommendations("User 147")]

['Mary Reilly (1996)',
 'Eye for an Eye (1996)',
 'Heidi Fleiss: Hollywood Madam (1995)',
 'Balto (1995)',
 'Tom and Huck (1995)']

In [51]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class HybridFiltering:
    def __init__(self, user_embeddings, item_embeddings, reviews):
        """
        Initialize the hybrid filtering model.

        Parameters:
        - user_embeddings: Matrix of user embeddings.
        - item_embeddings: Matrix of item embeddings.
        - reviews: User-item interaction matrix (NaN for unobserved values).
        - content_features: Matrix of content-based features for items (optional).
        """
        self.user_embeddings = user_embeddings
        self.item_embeddings = item_embeddings
        self.reviews = reviews

    def recommend_top_k_items(self, user_index, top_k=10, method='collaborative'):
        """
        Recommend the top-k items for a given user using hybrid filtering.

        Parameters:
        - user_index: Index of the target user.
        - top_k: Number of top items to recommend.
        - method: Recommendation method ('collaborative' or 'content-based').

        Returns:
        - List of top-k recommended item indices.
        """
        if method == 'collaborative':
            return self.collaborative_recommendation(user_index, top_k)
        elif method == 'content-based':
            return self.content_based_recommendation(user_index, top_k)
        else:
            raise ValueError("Invalid recommendation method. Use 'collaborative' or 'content-based'.")

    def collaborative_recommendation(self, user_index, top_k=10):
        """
        Recommend top-k items for a user using collaborative filtering.

        Parameters:
        - user_index: Index of the target user.
        - top_k: Number of top items to recommend.

        Returns:
        - List of top-k recommended item indices.
        """
        target_user_embedding = self.user_embeddings[user_index]
        similarities = cosine_similarity([target_user_embedding], self.user_embeddings).flatten()
        similar_users = np.argsort(similarities)[::-1][1:top_k+1]

        aggregated_preferences = np.sum(self.user_embeddings[similar_users] * similarities[similar_users, np.newaxis], axis=0)
        user_index = min(user_index, self.reviews.shape[0] - 1)
        item_indices = np.where(np.isnan(self.reviews[user_index]))[0]

        recommended_item_indices = item_indices[np.argsort(aggregated_preferences)[::-1]][:top_k]
        return recommended_item_indices

    def content_based_recommendation(self, user_index, top_k=10):
        """
        Recommend top-k items for a user using content-based filtering.

        Parameters:
        - user_index: Index of the target user.
        - top_k: Number of top items to recommend.

        Returns:
        - List of top-k recommended item indices.
        """

        target_user_embedding = self.user_embeddings[user_index]
        item_indices = np.where(np.isnan(self.reviews[user_index]))[0]

        # Calculate cosine similarity between the target user and items based on content features
        similarities = cosine_similarity([target_user_embedding], self.item_embeddings[item_indices]).flatten()

        # Sort items according to content-based similarity in descending order
        recommended_item_indices = item_indices[np.argsort(similarities)[::-1]][:top_k]
        return recommended_item_indices

# Example usage:
# Assuming 'user_embeddings', 'item_embeddings', 'reviews', and 'content_features' are your matrices
hybrid_model = HybridFiltering(rs.user_embedding, rs.item_embedding, rs.reviews)

# Recommend top-10 items for user with index 0 using collaborative filtering
user_index_to_recommend = 458
top_k_recommendations_collaborative = hybrid_model.recommend_top_k_items(user_index_to_recommend, top_k=10, method='collaborative')

# Recommend top-10 items for user with index 0 using content-based filtering
top_k_recommendations_content_based = hybrid_model.recommend_top_k_items(user_index_to_recommend, top_k=10, method='content-based')

print(f"Top 10 collaborative recommendations for user {user_index_to_recommend}: {top_k_recommendations_collaborative}")
print(f"Top 10 content-based recommendations for user {user_index_to_recommend}: {top_k_recommendations_content_based}")


Top 10 collaborative recommendations for user 458: [54 51 67 15 37 89  6 80 66 46]
Top 10 content-based recommendations for user 458: [897 874 910 575 255 543 181  57 648 603]


In [61]:
rs.items[603]

'Ashes of Time (Dung che sai duk) (1994)'

In [63]:
np.where(~np.isnan(rs.reviews[458]))[0]

array([224])