In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# Load the MovieLens 20M dataset
movies = pd.read_csv(r"C:\Users\Sahilsharma\Desktop\Movie Dataset\movie.csv")
ratings = pd.read_csv(r"C:\Users\Sahilsharma\Desktop\Movie Dataset\rating.csv")

# Step 1: Content-Based Filtering (CBF) using TF-IDF on genres
tfidf = TfidfVectorizer(stop_words='english')

# Fill NaN genres with an empty string
movies['genres'] = movies['genres'].fillna('')

# Compute the TF-IDF matrix based on genres
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute cosine similarity between all movies based on the TF-IDF matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get content-based movie recommendations
def content_based_recommendations(movie_title, movies_df, cosine_sim_matrix, num_recommendations=10):
    if movie_title not in movies_df['title'].values:
        return "Movie not found in the database."
    
    idx = movies_df[movies_df['title'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    
    return movies_df['title'].iloc[movie_indices]

# Step 3: Collaborative Filtering using SVD
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)
svd_model = SVD()
svd_model.fit(trainset)

# Function to get movie recommendations using collaborative filtering
def collaborative_filtering_recommendations(user_id, model, movies_df, num_recommendations=10):
    movie_ids = movies_df['movieId'].unique()
    user_ratings = []
    
    for movie_id in movie_ids:
        prediction = model.predict(user_id, movie_id)
        user_ratings.append((movie_id, prediction.est))
    
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    top_movie_ids = [movie_id for movie_id, rating in user_ratings[:num_recommendations]]
    
    return movies_df[movies_df['movieId'].isin(top_movie_ids)]['title']

# Hybrid Recommendation System (Weighted Combination)
def hybrid_recommendations(user_id, movie_title, movies_df, cf_model, cosine_sim_matrix, num_recommendations=10, alpha=0.5):
    cb_recs = content_based_recommendations(movie_title, movies_df, cosine_sim_matrix, num_recommendations)
    cf_recs = collaborative_filtering_recommendations(user_id, cf_model, movies_df, num_recommendations)
    
    # Combine both recommendations
    combined_recs = pd.concat([cb_recs, cf_recs]).drop_duplicates()
    
    return combined_recs.head(num_recommendations)

# User input for movie title and user ID
user_id = int(input("Enter your user ID: "))
movie_title = input("Enter a movie title you like: ")

# Display recommendations
print("\nContent-based recommendations:")
print(content_based_recommendations(movie_title, movies, cosine_sim))

print("\nCollaborative filtering recommendations:")
print(collaborative_filtering_recommendations(user_id, svd_model, movies))

print("\nHybrid recommendations:")
print(hybrid_recommendations(user_id, movie_title, movies, svd_model, cosine_sim))
