In [1]:
from copy import deepcopy
import pandas as pd
import numpy as np

from implicit.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from joblib import Parallel, delayed

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# links_df = pd.read_csv('Dataset/links.csv')
movies_df = pd.read_csv('Dataset/movies.csv')
ratings_df = pd.read_csv('Dataset/ratings.csv')
# tags_df = pd.read_csv('Dataset/tags.csv')


In [3]:
ratings_df = ratings_df.sort_values(by=['userId', 'movieId', 'timestamp'], ascending=[True, True, False])

ratings_df = ratings_df.drop_duplicates(subset=['userId', 'movieId'], keep='first')

ratings_df = ratings_df.drop(columns=['timestamp'])

In [4]:
df_final = ratings_df.merge(movies_df, how="inner", on="movieId")#.merge(tags_df_altered, how="inner", on="movieId")
df_final = df_final.drop(columns=['title'])

df_final.head(4)

Unnamed: 0,userId,movieId,rating,genres
0,1,1,4.0,Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,Comedy|Romance
2,1,6,4.0,Action|Crime|Thriller
3,1,47,5.0,Mystery|Thriller


In [5]:
df = deepcopy(df_final)#.merge(weighted_avg, how="inner", on="movieId")

In [6]:
# import numpy as np
# import pandas as pd
# from scipy.sparse import csr_matrix
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# from implicit.als import AlternatingLeastSquares

# class HybridMovieRecommender:
#     def __init__(self, ratings_df):
#         # Preprocess ratings data
#         self.ratings_df = ratings_df
        
#         # Prepare collaborative filtering components
#         self.user_encoder = {user: idx for idx, user in enumerate(ratings_df['userId'].unique())}
#         self.movie_encoder = {movie: idx for idx, movie in enumerate(ratings_df['movieId'].unique())}
#         self.user_decoder = {idx: user for user, idx in self.user_encoder.items()}
#         self.movie_decoder = {idx: movie for movie, idx in self.movie_encoder.items()}
        
#         # Create sparse rating matrix for ALS
#         user_ids = [self.user_encoder[uid] for uid in ratings_df['userId']]
#         movie_ids = [self.movie_encoder[mid] for mid in ratings_df['movieId']]
#         ratings = ratings_df['rating'].values
        
#         self.ratings_matrix = csr_matrix((ratings, (user_ids, movie_ids)))
        
#         # Prepare content-based filtering components
#         self.tfidf = TfidfVectorizer(token_pattern=r'\b\w+\b')
#         self.genre_matrix = self.tfidf.fit_transform(ratings_df['genres'].unique())
        
#         # Train ALS model
#         self.als_model = AlternatingLeastSquares(factors=50, regularization=0.01)
#         self.als_model.fit(self.ratings_matrix)
    
#     def get_content_based_recommendations(self, movie_id, n_recommendations=5):
#         # Find similar movies based on genre
#         movie_index = np.where(self.ratings_df['movieId'] == movie_id)[0][0]
#         movie_genres = self.tfidf.transform([self.ratings_df.loc[movie_index, 'genres']])
        
#         genre_similarities = cosine_similarity(movie_genres, self.genre_matrix)[0]
#         similar_genre_indices = genre_similarities.argsort()[::-1][1:n_recommendations+1]
        
#         return self.ratings_df.iloc[similar_genre_indices]['movieId'].tolist()
    
#     def get_collaborative_recommendations(self, user_id, n_recommendations=5):
#         # Get ALS recommendations
#         user_index = self.user_encoder[user_id]
#         recommended_indices, _ = self.als_model.recommend(user_index, self.ratings_matrix[user_index], N=n_recommendations)
        
#         return [self.movie_decoder[idx] for idx in recommended_indices]
    
#     def get_hybrid_recommendations(self, user_id, movie_id, n_recommendations=5):
#         # Combine collaborative and content-based recommendations
#         collab_recs = self.get_collaborative_recommendations(user_id, n_recommendations)
#         content_recs = self.get_content_based_recommendations(movie_id, n_recommendations)
        
#         # Merge and deduplicate recommendations
#         hybrid_recs = list(dict.fromkeys(collab_recs + content_recs))
        
#         return hybrid_recs[:n_recommendations]

# # Example usage
# def main():
#     ratings_df = deepcopy(df)
    
#     # Initialize recommender
#     recommender = HybridMovieRecommender(ratings_df)
    
#     # Get recommendations
#     user_id = 1
#     movie_id = 1
#     hybrid_recommendations = recommender.get_hybrid_recommendations(user_id, movie_id)
#     print(f"Hybrid Recommendations for User {user_id} based on Movie {movie_id}: {hybrid_recommendations}")

# if __name__ == "__main__":
#     main()

In [7]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from implicit.als import AlternatingLeastSquares

class ComprehensiveMovieRecommender:
    def __init__(self, ratings_df):
        # Preprocess ratings data
        self.ratings_df = ratings_df
        
        # Prepare collaborative filtering components
        self.user_encoder = {user: idx for idx, user in enumerate(ratings_df['userId'].unique())}
        self.movie_encoder = {movie: idx for idx, movie in enumerate(ratings_df['movieId'].unique())}
        self.user_decoder = {idx: user for user, idx in self.user_encoder.items()}
        self.movie_decoder = {idx: movie for movie, idx in self.movie_encoder.items()}
        
        # Create sparse rating matrix for ALS
        user_ids = [self.user_encoder[uid] for uid in ratings_df['userId']]
        movie_ids = [self.movie_encoder[mid] for mid in ratings_df['movieId']]
        ratings = ratings_df['rating'].values
        
        self.ratings_matrix = csr_matrix((ratings, (user_ids, movie_ids)))
        
        # Prepare content-based filtering components
        self.tfidf = TfidfVectorizer(token_pattern=r'\b\w+\b')
        self.genre_matrix = self.tfidf.fit_transform(ratings_df['genres'].unique())
        
        # Train ALS model
        self.als_model = AlternatingLeastSquares(factors=50, regularization=0.01)
        self.als_model.fit(self.ratings_matrix)
    
    def get_content_based_recommendations(self, n_recommendations=3):
        # Generate global content-based recommendations
        genre_similarities = cosine_similarity(self.genre_matrix)
        avg_genre_similarity = genre_similarities.mean(axis=1)
        top_genre_indices = avg_genre_similarity.argsort()[::-1][:n_recommendations]
        
        return self.ratings_df.iloc[top_genre_indices]['movieId'].tolist()
    
    def get_collaborative_recommendations(self, user_id, n_recommendations=3):
        # Get ALS recommendations for a specific user
        user_index = self.user_encoder[user_id]
        recommended_indices, _ = self.als_model.recommend(user_index, self.ratings_matrix[user_index], N=n_recommendations)
        
        return [self.movie_decoder[idx] for idx in recommended_indices]
    
    def get_global_hybrid_recommendations(self, user_id, n_recommendations=3):
        # Combine global collaborative and content-based recommendations
        collab_recs = self.get_collaborative_recommendations(user_id, n_recommendations)
        content_recs = self.get_content_based_recommendations(n_recommendations)
        
        # Merge and deduplicate recommendations
        hybrid_recs = list(dict.fromkeys(collab_recs + content_recs))
        
        return hybrid_recs[:n_recommendations]

# Example usage
def main():
    ratings_df = deepcopy(df)
    
    # Initialize recommender
    recommender = ComprehensiveMovieRecommender(ratings_df)
    
    # Get top 3 global recommendations for a user
    user_id = 1
    global_recommendations = recommender.get_global_hybrid_recommendations(user_id)
    print(f"Global Recommendations for User {user_id}: {global_recommendations}")

if __name__ == "__main__":
    main()

  check_blas_config()
100%|██████████| 15/15 [00:00<00:00, 49.90it/s]

Global Recommendations for User 1: [1036, 2804, 377]



