In [None]:
# Title: Movie Recommendation System

# Objective: Recommend movies similar to a chosen movie based on genres.

# Data Source: Will be using the csv data source provided in the youtube video (https://rb.gy/xxcw7q).

# Import Library
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Import Data
url_given = "https://raw.githubusercontent.com/YBI-Foundation/Dataset/main/Movies%20Recommendation.csv"
movies = pd.read_csv(url_given)

# Describe Data
print(movies.head())
print(movies.info())

#Data Visualization
plt.figure(figsize=(100, 60))
sns.countplot(y=movies['Movie_Genre'], order=movies['Movie_Genre'].value_counts().index)
plt.title('Distribution of the Movie Genres')
plt.xlabel('Count')
plt.ylabel('Genre')
plt.show()

# Data Processing
movies['Movie_Genre'] = movies['Movie_Genre'].fillna('')

# Define Target Variable (y) and Feature Variables (X)
X = movies['Movie_Genre']
y = movies['Movie_Title']

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=14, random_state=2529)

# Modelling
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Calculate Cosine Similarity
cosine_sim = linear_kernel(X_train_tfidf, X_train_tfidf)

# Function to get movie recommendations based on cosine similarity
def recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = movies[movies['Movie_Title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies['Movie_Title'].iloc[movie_indices]

# Model Evaluation
# This is a content-based recommender system, so we can evaluate it by looking at the recommendations for a few movies.
print(recommendations('The Matrix'))

# Prediction
movie_selected = "The Matrix"
recommneded_movies = recommendations(movie_selected)
print(f'Movies similar to "{movie_selected}":')
print(recommneded_movies)

# Explaination

## This recommendation system uses content-based filtering. It leverages the genres of movies to recommend similar movies.
## The TF-IDF Vectorizer converts the genres into a matrix of TF-IDF features. Each movie's genre is represented as a TF-IDF vector.
## Cosine similarity is then used to calculate the similarity between these vectors, which helps in finding movies with similar genre profiles.
## The function `recommendations` returns the top 10 movies that are most similar to the input movie based on their genres.
