In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel









In [2]:
# Load the dataset from the provided URL into a Pandas DataFrame
url = "https://raw.githubusercontent.com/YBI-Foundation/Dataset/main/Movies%20Recommendation.csv"
df = pd.read_csv(url)


In [4]:
# Data Preprocessing
# Combine relevant text features into a single 'content' feature for recommendation
df['content'] = df['Movie_Title'] + ' ' + df['Movie_Genre']

# Create a TF-IDF vectorizer to convert text content into numerical vectors
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['content'])
# Calculate cosine similarity between movies based on their TF-IDF vectors
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [5]:
# Function to get movie recommendations based on movie title
def get_recommendations(title, cosine_sim, df, top_n=10):
    idx = df.index[df['Movie_Title'] == title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return df['Movie_Title'].iloc[movie_indices]


In [6]:
# Example: Get top 10 movie recommendations for a specific movie title
movie_title = 'Four Rooms'
recommendations = get_recommendations(movie_title, cosine_sim, df)
print(f"Top 10 movie recommendations for '{movie_title}':")
print(recommendations)


Top 10 movie recommendations for 'Four Rooms':
1300                  Go
3142            The R.M.
3464            R.I.P.D.
3775      Now You See Me
4164            The Call
2204             The Ten
4557    Now You See Me 2
1226            S.W.A.T.
1214             The Man
2607      Alone With Her
Name: Movie_Title, dtype: object
