<a href="https://colab.research.google.com/github/Zuhair0000/movie-recommendation/blob/main/movie_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# **Load Dataset**

In [3]:
movies = pd.read_csv("movies.csv")
tags = pd.read_csv("tags.csv")

In [4]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,22,26479,Kevin Kline,1583038886
1,22,79592,misogyny,1581476297
2,22,247150,acrophobia,1622483469
3,34,2174,music,1249808064
4,34,2174,weird,1249808102


In [7]:
tags["tag"].isna().sum()

np.int64(1)

In [8]:
tags_grouped = (
    tags.dropna(subset=["tag"]).groupby("movieId")["tag"].apply(lambda x: " ".join(x)).reset_index()
)

In [11]:
movies = movies.merge(tags_grouped, on="movieId", how='left')

In [13]:
movies["tag"] = movies["tag"].fillna("")

In [14]:
movies["text"] = movies["genres"] + " " + movies["tag"]

In [15]:
movies.head()

Unnamed: 0,movieId,title,genres,tag,text
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,children Disney animation children Disney Disn...,Adventure|Animation|Children|Comedy|Fantasy ch...
1,2,Jumanji (1995),Adventure|Children|Fantasy,Robin Williams fantasy Robin Williams time tra...,Adventure|Children|Fantasy Robin Williams fant...
2,3,Grumpier Old Men (1995),Comedy|Romance,comedinha de velhinhos engraÃƒÂ§ada comedinha ...,Comedy|Romance comedinha de velhinhos engraÃƒÂ...
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,,Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy,,Comedy


In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(movies["text"])

In [17]:
from sklearn.metrics.pairwise import cosine_similarity
def recommend(movie_title, top_n=10):
    # Find movie index
    idx = movies[movies["title"] == movie_title].index

    if len(idx) == 0:
        return "Movie not found"

    idx = idx[0]

    # Compute similarity ONLY for this movie
    sim_scores = cosine_similarity(
        tfidf_matrix[idx], tfidf_matrix
    ).flatten()

    # Sort by similarity
    similar_indices = sim_scores.argsort()[::-1][1:top_n+1]

    return movies.iloc[similar_indices][["title", "genres"]]

In [18]:
recommend("Toy Story (1995)")

Unnamed: 0,title,genres
3021,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy
2264,"Bug's Life, A (1998)",Adventure|Animation|Children|Comedy
6259,Finding Nemo (2003),Adventure|Animation|Children|Comedy
21090,La Luna (2011),Animation
10812,Cars (2006),Animation|Children|Comedy
4781,"Monsters, Inc. (2001)",Adventure|Animation|Children|Comedy|Fantasy
60708,Toy Story 4 (2019),Adventure|Animation|Children|Comedy
39850,Finding Dory (2016),Adventure|Animation|Comedy
81601,Lightyear (2022),Action|Adventure|Animation|Children|Comedy|Sci-Fi
18315,For the Birds (2000),Animation|Children|Comedy
