In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = "IMDB-Movie-Dataset(2023-1951).csv"
movies_df = pd.read_csv(data)

In [4]:
movies_df.head()

Unnamed: 0.1,Unnamed: 0,movie_id,movie_name,year,genre,overview,director,cast
0,0,tt15354916,Jawan,2023,"Action, Thriller",A high-octane action thriller which outlines t...,Atlee,"Shah Rukh Khan, Nayanthara, Vijay Sethupathi, ..."
1,1,tt15748830,Jaane Jaan,2023,"Crime, Drama, Mystery",A single mother and her daughter who commit a ...,Sujoy Ghosh,"Kareena Kapoor, Jaideep Ahlawat, Vijay Varma, ..."
2,2,tt11663228,Jailer,2023,"Action, Comedy, Crime",A retired jailer goes on a manhunt to find his...,Nelson Dilipkumar,"Rajinikanth, Mohanlal, Shivarajkumar, Jackie S..."
3,3,tt14993250,Rocky Aur Rani Kii Prem Kahaani,2023,"Comedy, Drama, Family",Flamboyant Punjabi Rocky and intellectual Beng...,Karan Johar,"Ranveer Singh, Alia Bhatt, Dharmendra, Shabana..."
4,4,tt15732324,OMG 2,2023,"Comedy, Drama",An unhappy civilian asks the court to mandate ...,Amit Rai,"Pankaj Tripathi, Akshay Kumar, Yami Gautam, Pa..."


In [5]:
movies_df.isna().sum()

Unnamed: 0     0
movie_id       0
movie_name     0
year          65
genre          0
overview       0
director       0
cast           0
dtype: int64

In [6]:
new_df = movies_df[['movie_id','movie_name','overview']]

In [7]:
new_df.head()

Unnamed: 0,movie_id,movie_name,overview
0,tt15354916,Jawan,A high-octane action thriller which outlines t...
1,tt15748830,Jaane Jaan,A single mother and her daughter who commit a ...
2,tt11663228,Jailer,A retired jailer goes on a manhunt to find his...
3,tt14993250,Rocky Aur Rani Kii Prem Kahaani,Flamboyant Punjabi Rocky and intellectual Beng...
4,tt15732324,OMG 2,An unhappy civilian asks the court to mandate ...


In [8]:
new_df['overview'] = new_df['overview'].apply(lambda x:x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['overview'] = new_df['overview'].apply(lambda x:x.lower())


In [9]:
new_df.head()

Unnamed: 0,movie_id,movie_name,overview
0,tt15354916,Jawan,a high-octane action thriller which outlines t...
1,tt15748830,Jaane Jaan,a single mother and her daughter who commit a ...
2,tt11663228,Jailer,a retired jailer goes on a manhunt to find his...
3,tt14993250,Rocky Aur Rani Kii Prem Kahaani,flamboyant punjabi rocky and intellectual beng...
4,tt15732324,OMG 2,an unhappy civilian asks the court to mandate ...


In [10]:
# Vectorization of movie overviews
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(new_df['overview'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [17]:
import requests


In [33]:
def fetch_poster(movie_id):
      API_KEY = "ff2de638f2d96a0291a0f19086da8839"  # Replace with your actual TMDb API key
      url = f"https://api.themoviedb.org/3/movie/{movie_id}/images?api_key={API_KEY}"
      response = requests.get(url)
      data = response.json()
      posters = data.get("posters")
      if posters:
          poster_path = posters[0].get("file_path")
          full_poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}"
          return full_poster_url
      else:
          # Use a generic placeholder image
          return "https://via.placeholder.com/300x450?text=No+Image"

In [None]:
def recommend_movies(movie_name, n=5):
    idx = new_df[new_df['movie_name'].str.lower() == movie_name.lower()].index
    if len(idx) == 0:
        return "Movie not found. Please try another title."

    idx = idx[0]
    scores = list(enumerate(cosine_sim[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:n+1]

    recommendations = []
    for i, _ in scores:
        movie = new_df.iloc[i]
        recommendations.append((movie['movie_name'], movie['movie_id']))

    return recommendations


In [35]:
# Example usage
movie_name = "Jawan"  # Replace with any Bollywood movie title
recommended_movies = recommend_movies(movie_name)

if isinstance(recommended_movies, str):
    print(recommended_movies)
else:
    for title, imdb_id in recommended_movies:
        print(f"Title: {title}")
        print(f"Poster URL: {fetch_poster(imdb_id)}\n")


Title: Anek
Poster URL: https://image.tmdb.org/t/p/w500/fMmCbD5sUF1If2C6BzHCgT7LY32.jpg

Title: Lost
Poster URL: https://image.tmdb.org/t/p/w500/yRQkH26GZ7Qp9xkAMfmVNnkb1Ez.jpg

Title: Satya 2
Poster URL: https://image.tmdb.org/t/p/w500/eBwOhSMYvqQHvPpjvkHbXQ1ZDRs.jpg

Title: Untitled SRK-Suhana-Marflix movie
Poster URL: https://via.placeholder.com/300x450?text=No+Image

Title: Chalk N Duster
Poster URL: https://image.tmdb.org/t/p/w500/ry5Vs8Zjh2yWQ1NSODpDxLfI7ok.jpg

