In [1]:
#2. Model Implementation
%%writefile content_based.py
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def setup_content_model(movies):
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies['genres'])
    return cosine_similarity(tfidf_matrix)

def get_content_recommendations(movies, cosine_sim, movie_id, top_n=10):
    idx = movies.index[movies['movieId'] == movie_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    indices = [i[0] for i in sim_scores]
    return movies.iloc[indices][['movieId', 'title', 'genres']]

Writing content_based.py


In [2]:
# Create collaborative.py
%%writefile collaborative.py


from surprise import Dataset, Reader, SVD
import numpy as np

def setup_collaborative_model(ratings):
    reader = Reader(rating_scale=(0.5, 5.0))
    data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
    trainset = data.build_full_trainset()
    svd = SVD()
    svd.fit(trainset)
    return svd

def get_collab_recommendations(movies, ratings, svd, user_id, top_n=10):
    all_movie_ids = movies['movieId'].unique()
    rated = ratings[ratings['userId'] == user_id]['movieId']
    to_predict = np.setdiff1d(all_movie_ids, rated)
    testset = [[user_id, mid, 4.] for mid in to_predict]
    preds = svd.test(testset)
    top_preds = sorted(preds, key=lambda x: x.est, reverse=True)[:top_n]
    top_ids = [pred.iid for pred in top_preds]
    return movies[movies['movieId'].isin(top_ids)][['movieId', 'title', 'genres']]

Writing collaborative.py
