### 1. Data Loading and Preprocessing

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix

# Load datasets
ratings = pd.read_csv('/Users/bv/Documents/WBS Data Science/Project 8 - Recommender Systems/Data/ratings.csv')
movies = pd.read_csv('/Users/bv/Documents/WBS Data Science/Project 8 - Recommender Systems/Data/movies.csv')
tags = pd.read_csv('/Users/bv/Documents/WBS Data Science/Project 8 - Recommender Systems/Data/tags.csv')

# Preprocessing
movies['genres'] = movies['genres'].str.replace('|', ' ')
movies_with_tags = pd.merge(movies, tags, on='movieId', how='left')
movies_with_tags['metadata'] = movies_with_tags[['genres', 'tag']].apply(lambda x: ' '.join(x.dropna()), axis=1)


### 2. Collaborative Filtering

In [2]:
# Creating a user-movie matrix
user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Computing cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix)
user_similarity = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

### 3. Content-Based Filtering

In [3]:
# Creating TF-IDF matrix for movie metadata
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_with_tags['metadata'])
movie_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)

### 4. Hybrid Recommender

In [4]:
def hybrid_recommender(userId, movieId):
    # Get top 10 similar users
    similar_users = user_similarity[userId].sort_values(ascending=False)[1:11]
    
    # Get movies rated by similar users
    similar_users_movies = user_movie_matrix.loc[similar_users.index]
    recommended_movies = similar_users_movies.mean(axis=0).sort_values(ascending=False).head(20).index.tolist()
    
    # Content-based recommendation
    movie_idx = movies_with_tags.index[movies_with_tags['movieId'] == movieId].tolist()[0]
    content_similar_movies = list(enumerate(movie_similarity[movie_idx]))
    content_similar_movies = sorted(content_similar_movies, key=lambda x: x[1], reverse=True)[1:11]
    
    # Combine recommendations
    hybrid_recommendations = set([movies_with_tags.iloc[i[0]]['movieId'] for i in content_similar_movies])
    hybrid_recommendations.update(recommended_movies)
    
    return movies[movies['movieId'].isin(hybrid_recommendations)]['title']

### 5. Recommendation Function Usage

In [9]:
# Example usage: Recommend movies for user with ID 10 based on their rating of movie with ID 1
recommended_movies = hybrid_recommender(10, 1)
print(recommended_movies)

0                                        Toy Story (1995)
277                      Shawshank Redemption, The (1994)
314                                   Forrest Gump (1994)
514                                   Pretty Woman (1990)
1706                                          Antz (1998)
1757                                 Bug's Life, A (1998)
2355                                   Toy Story 2 (1999)
2809       Adventures of Rocky and Bullwinkle, The (2000)
3000                     Emperor's New Groove, The (2000)
3194                                         Shrek (2001)
3287                                Legally Blonde (2001)
3568                                Monsters, Inc. (2001)
3638    Lord of the Rings: The Fellowship of the Ring,...
4356                                Bruce Almighty (2003)
4360                                  Finding Nemo (2003)
4427    Pirates of the Caribbean: The Curse of the Bla...
4644                                 Love Actually (2003)
4800    Lord o