## Content Based Movie Recommender System

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the MovieLens dataset (movies.csv and tags.csv)
movies = pd.read_csv(r"C:\Users\Sahilsharma\Desktop\Projects\Movie Reccomendation\Movie Dataset\movie.csv")
tags = pd.read_csv(r"C:\Users\Sahilsharma\Desktop\Projects\Movie Reccomendation\Movie Dataset\tag.csv")

# Preprocessing: merge the movies and tags datasets
movies['genres'] = movies['genres'].str.replace('|', ' ')
# Group tags by movieId and concatenate the tags, converting non-string entries to empty strings
tags_grouped = tags.groupby('movieId')['tag'].apply(lambda x: ' '.join(x.dropna().astype(str))).reset_index()
movies = pd.merge(movies, tags_grouped, on='movieId', how='left')

# Fill NaN values in the 'tag' column with an empty string
movies['tag'] = movies['tag'].fillna('')

# Combine the genres and tags for content-based features
movies['combined_features'] = movies['genres'] + ' ' + movies['tag']

# Apply TF-IDF Vectorizer to convert text data into vectors
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get movie recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = movies[movies['title'].str.contains(title, case=False, na=False)].index[0]

    # Get the pairwise similarity scores for all movies with the input movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 10 most similar movies
    sim_indices = [i[0] for i in sim_scores[1:11]]

    # Return the titles of the 10 most similar movies
    return movies['title'].iloc[sim_indices]

# Input-based interaction
user_movie = input("Enter a movie title: ")
recommendations = get_recommendations(user_movie)

print("\nTop 10 movie recommendations for '{}':".format(user_movie))
for idx, movie in enumerate(recommendations, 1):
    print(f"{idx}. {movie}")


Enter a movie title:  Toy Story



Top 10 movie recommendations for 'Toy Story':
1. Toy Story 2 (1999)
2. Bug's Life, A (1998)
3. Monsters, Inc. (2001)
4. Ice Age (2002)
5. Finding Nemo (2003)
6. Toy Story 3 (2010)
7. Incredibles, The (2004)
8. Ratatouille (2007)
9. Tin Toy (1988)
10. The Legend of Mor'du (2012)
