In [None]:
# issue install surprise

In [1]:
# Dependencies
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

ModuleNotFoundError: No module named 'surprise'

In [None]:
# Sample movie data (movie titles and genres)
movies_data = {
    'title': [
        'Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E', 'Movie F', 'Movie G',
    ],
    'genre': [
        'Action', 'Comedy', 'Action, Drama', 'Comedy, Drama', 'Action, Comedy',
        'Drama', 'Comedy, Drama',
    ],
}

# Sample user ratings data (user ID, movie ID, rating)
ratings_data = {
    'user_id': [1, 1, 2, 2, 3, 3, 4, 4],
    'movie_id': [1, 2, 1, 3, 2, 3, 4, 5],
    'rating': [4, 5, 3, 4, 5, 4, 2, 1],
}

# Convert movie and ratings data into pandas DataFrames
movies_df = pd.DataFrame(movies_data)
ratings_df = pd.DataFrame(ratings_data)

In [None]:
# Content-based filtering using movie genres
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
genre_matrix = tfidf_vectorizer.fit_transform(movies_df['genre'])
cosine_sim = cosine_similarity(genre_matrix, genre_matrix)

# Collaborative filtering using Surprise library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['user_id', 'movie_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

In [None]:
# Singular Value Decomposition(
model = SVD()
model.fit(trainset)
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)

In [None]:
# Hybrid recommendation function combining both approaches
def hybrid_recommendation(user_id, movie_title, cosine_sim, model, top_n=5):
    # Get content-based recommendations based on movie genres
    movie_idx = movies_df[movies_df['title'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]
    movie_indices = [score[0] for score in sim_scores]
    content_based_recommendations = movies_df['title'].iloc[movie_indices].tolist()

    # Get collaborative filtering recommendations for the user
    unrated_movies = set(movies_df['title']) - set(ratings_df[ratings_df['user_id'] == user_id]['movie_id'])
    user_predicted_ratings = [(movie_title, model.predict(user_id, movie_title).est) for movie_title in unrated_movies]
    user_predicted_ratings = sorted(user_predicted_ratings, key=lambda x: x[1], reverse=True)
    collaborative_filtering_recommendations = [movie_title for movie_title, _ in user_predicted_ratings[:top_n]]

    # Combine both recommendations
    hybrid_recommendations = list(set(content_based_recommendations) | set(collaborative_filtering_recommendations))

    return hybrid_recommendations

In [None]:
# Example usage:
user_id = 1
movie_title = 'Movie A'
recommendations = hybrid_recommendation(user_id, movie_title, cosine_sim, model)
print(f"Hybrid Recommendations for User {user_id} based on '{movie_title}':")
print(recommendations)
