## Building the Recommendation Model
We'll use collaborative filtering with the Surprise library and a simple content-based filtering approach.

Collaborative filtering recommends movies based on the preferences of similar users.

## Fetching the Data from the TMDb site

In [2]:
import requests
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

TMDB_API_KEY = input("Enter your API key: ")

def fetch_movies_from_tmdb():
    movies = []
    for page in range(1, 6):  # Fetching multiple pages of popular movies
        url = f'https://api.themoviedb.org/3/movie/popular?api_key={TMDB_API_KEY}&language=en-US&page={page}'
        response = requests.get(url)
        data = response.json()
        movies.extend(data['results'])
    return pd.DataFrame(movies)

def fetch_movie_details(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}&language=en-US'
    response = requests.get(url)
    return response.json()

movies_df = fetch_movies_from_tmdb()
movies_df['details'] = movies_df['id'].apply(fetch_movie_details)
movies_df.head()

Enter your API key: 306c333178bf1802b38c1f8863f606fc


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,details
0,False,/zo8CIjJ2nfNOevqNajwMRO6Hwka.jpg,"[16, 12, 10751, 35]",1241982,en,Moana 2,After receiving an unexpected call from her wa...,3664.907,/aLVkiINlIeCkcZIzb7XHzPYgO6L.jpg,2024-11-21,Moana 2,False,7.235,1543,"{'adult': False, 'backdrop_path': '/zo8CIjJ2nf..."
1,False,/xljLe4TiQL1b4sT7956IGgj2vrf.jpg,"[28, 18, 12, 10752]",927342,ta,அமரன்,A heroic true story of Major Mukund Varadaraja...,2763.388,/6m435uh40N7Gzfbd69ttp6W0sdR.jpg,2024-10-31,Amaran,False,8.136,107,"{'adult': False, 'backdrop_path': '/xljLe4TiQL..."
2,False,/qfAfE5auxsuxhxPpnETRAyTP5ff.jpg,"[28, 53, 878]",822119,en,Captain America: Brave New World,After meeting with newly elected U.S. Presiden...,2666.301,/pzIddUEMWhWzfvLI3TwxUG2wGoi.jpg,2025-02-12,Captain America: Brave New World,False,6.2,493,"{'adult': False, 'backdrop_path': '/qfAfE5auxs..."
3,False,/zOpe0eHsq0A2NvNyBbtT6sj53qV.jpg,"[28, 878, 35, 10751]",939243,en,Sonic the Hedgehog 3,"Sonic, Knuckles, and Tails reunite against a p...",2651.118,/d8Ryb8AunYAuycVKDp5HpdWPKgC.jpg,2024-12-19,Sonic the Hedgehog 3,False,7.776,1848,"{'adult': False, 'backdrop_path': '/zOpe0eHsq0..."
4,False,/u7AZ5CdT2af8buRjmYCPXNyJssd.jpg,"[28, 35]",1160956,zh,熊猫计划,International action star Jackie Chan is invit...,1949.941,/xVS9XiO9upp2SnWx6KpBYb79hLR.jpg,2024-10-01,Panda Plan,False,7.25,114,"{'adult': False, 'backdrop_path': '/u7AZ5CdT2a..."


In [3]:
movies_df['genres'] = movies_df['details'].apply(lambda x: ', '.join([genre['name'] for genre in x['genres']]))  # Extract genres from details and join them into a single string
movies_df['overview'] = movies_df['overview'].fillna('')  # Fill missing overviews with an empty string
movies_df['content'] = movies_df['genres'] + ' ' + movies_df['overview']  # Combine genres and overview into a single content column
movies_df.head(2)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,details,genres,content
0,False,/zo8CIjJ2nfNOevqNajwMRO6Hwka.jpg,"[16, 12, 10751, 35]",1241982,en,Moana 2,After receiving an unexpected call from her wa...,3664.907,/aLVkiINlIeCkcZIzb7XHzPYgO6L.jpg,2024-11-21,Moana 2,False,7.235,1543,"{'adult': False, 'backdrop_path': '/zo8CIjJ2nf...","Animation, Adventure, Family, Comedy","Animation, Adventure, Family, Comedy After rec..."
1,False,/xljLe4TiQL1b4sT7956IGgj2vrf.jpg,"[28, 18, 12, 10752]",927342,ta,அமரன்,A heroic true story of Major Mukund Varadaraja...,2763.388,/6m435uh40N7Gzfbd69ttp6W0sdR.jpg,2024-10-31,Amaran,False,8.136,107,"{'adult': False, 'backdrop_path': '/xljLe4TiQL...","Action, Drama, Adventure, War","Action, Drama, Adventure, War A heroic true st..."


In [4]:
def fetch_movie_review(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/reviews?api_key={TMDB_API_KEY}&language=en-US'
    response = requests.get(url)
    data = response.json()
    return [review["content"] for review in data.get("results", [])]

In [5]:
movies_df['review'] = movies_df['id'].apply(fetch_movie_review)
movies_df.head(3)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,details,genres,content,review
0,False,/zo8CIjJ2nfNOevqNajwMRO6Hwka.jpg,"[16, 12, 10751, 35]",1241982,en,Moana 2,After receiving an unexpected call from her wa...,3664.907,/aLVkiINlIeCkcZIzb7XHzPYgO6L.jpg,2024-11-21,Moana 2,False,7.235,1543,"{'adult': False, 'backdrop_path': '/zo8CIjJ2nf...","Animation, Adventure, Family, Comedy","Animation, Adventure, Family, Comedy After rec...","[When ""Moana"" learns that the whole of Pacific..."
1,False,/xljLe4TiQL1b4sT7956IGgj2vrf.jpg,"[28, 18, 12, 10752]",927342,ta,அமரன்,A heroic true story of Major Mukund Varadaraja...,2763.388,/6m435uh40N7Gzfbd69ttp6W0sdR.jpg,2024-10-31,Amaran,False,8.136,107,"{'adult': False, 'backdrop_path': '/xljLe4TiQL...","Action, Drama, Adventure, War","Action, Drama, Adventure, War A heroic true st...",[Amaran is a moving tribute to Major Mukund Va...
2,False,/qfAfE5auxsuxhxPpnETRAyTP5ff.jpg,"[28, 53, 878]",822119,en,Captain America: Brave New World,After meeting with newly elected U.S. Presiden...,2666.301,/pzIddUEMWhWzfvLI3TwxUG2wGoi.jpg,2025-02-12,Captain America: Brave New World,False,6.2,493,"{'adult': False, 'backdrop_path': '/qfAfE5auxs...","Action, Thriller, Science Fiction","Action, Thriller, Science Fiction After meetin...",[<em>'Captain America: Brave New World'</em> c...


In [7]:
movies_df.review.to_list()

[['When "Moana" learns that the whole of Pacific Island society is under a curse from the powerful god "Nalo", she has to recruit a few faithful crew members and go seek out "Maui" before she can find the long lost and sunken island of "Morefutu". Once there, she hopes to break the malevolent force that has, for centuries, ensured that her people never manage to make contact with any others - despite her finding evidence that they did exist - somewhere, once upon a time! Of course there\'s "Heihei" the chicken and "Pua" the pig, but she\'s gonna need more help than that so up steps pin-up "Moni", the knife-wielding engineer "Loto" and the glass-half-empty farmer "Keke" and off they go. Loads of escapades await them as they again encounter their poisoned-dart firing coconut warriors - the "Kakamora", a giant clam and, of course, the winds and the seas as the mischievous "Matangi" and her army of bats tries to make life as difficult as they can. Luckily, "Matangi" has also had a run-in w

In [8]:
movies_df['original_language'].unique()

array(['en', 'ta', 'zh', 'th', 'te', 'es', 'ja', 'no', 'lv', 'fr', 'tr',
       'ko', 'pl', 'id', 'tl'], dtype=object)

In [9]:
movies_df['details']

0     {'adult': False, 'backdrop_path': '/zo8CIjJ2nf...
1     {'adult': False, 'backdrop_path': '/xljLe4TiQL...
2     {'adult': False, 'backdrop_path': '/qfAfE5auxs...
3     {'adult': False, 'backdrop_path': '/zOpe0eHsq0...
4     {'adult': False, 'backdrop_path': '/u7AZ5CdT2a...
                            ...                        
95    {'adult': False, 'backdrop_path': '/sc1abgWNXc...
96    {'adult': False, 'backdrop_path': '/lTojXQmR0X...
97    {'adult': False, 'backdrop_path': '/evFChfYeD2...
98    {'adult': False, 'backdrop_path': '/bVUB4WI2vT...
99    {'adult': False, 'backdrop_path': '/h0BUSRZlU7...
Name: details, Length: 100, dtype: object

## Data Preparation

In [10]:
# Prepare the data for content-based filtering
movies_df['genres'] = movies_df['details'].apply(lambda x: ' '.join([genre['name'] for genre in x['genres']]))
movies_df['overview'] = movies_df['overview'].fillna('')
movies_df['content'] = movies_df['genres'] + ' ' + movies_df['overview']

# Vectorize the content
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['content'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def get_content_based_recommendations(movie_id, cosine_sim=cosine_sim):
    # Handle the case where movie_id might not be present
    if movie_id not in movies_df['id'].values:
        return []  # Return an empty list if movie not found

    idx = movies_df.index[movies_df['id'] == movie_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar movies
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['id'].iloc[movie_indices].tolist()


In [11]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler


knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)


def get_collaborative_recommendations(movie_id, knn_model=None):
    ratings_url = f'https://api.themoviedb.org/3/movie/{movie_id}/ratings?api_key={TMDB_API_KEY}&language=en-US'
    response = requests.get(ratings_url)
    data = response.json()

    # Check if 'results' key exists before creating DataFrame
    if 'results' in data:
        ratings_df = pd.DataFrame(data['results'])

        # Normalize ratings
        scaler = MinMaxScaler()
        ratings_matrix = pd.pivot_table(ratings_df, values='rating', index='user_id', columns='movie_id').fillna(0)
        ratings_matrix = scaler.fit_transform(ratings_matrix)

        # Build k-NN model
        if knn is None:
            knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
            knn.fit(ratings_matrix)

        # Check if movie_id is in the ratings matrix
        if movie_id in ratings_matrix.columns:
            movie_idx = list(ratings_matrix.columns).index(movie_id)
            distances, indices = knn.kneighbors(ratings_matrix[:, movie_idx].reshape(1, -1), n_neighbors=11)
            movie_indices = [list(ratings_matrix.columns)[i] for i in indices.flatten()][1:]
            return movie_indices
        else:
            return [] # Return empty list if movie not found in ratings
    else:
        return [] # Return empty list if no ratings found for the movie

In [12]:
def get_combined_recommendations(movie_id):
    content_based_recs = get_content_based_recommendations(movie_id)
    collaborative_recs = get_collaborative_recommendations(movie_id)
    combined_recs = list(set(content_based_recs + collaborative_recs))
    return combined_recs[:10]  # Get top 10 recommendations

# Example usage
movie_id = 939243
recommendations = get_combined_recommendations(movie_id)
print(recommendations)


[1247019, 1075789, 507086, 519182, 1114894, 1249013, 845781, 950396, 1241982]


In [13]:

# Show the movie corresponding to the movie id
movie_title = movies_df[movies_df['id'] == movie_id]['title'].values[0]
print(f"Selected Movie: {movie_title}")

# Show the movies corresponding to the recommended movies
recommended_movie_titles = movies_df[movies_df['id'].isin(recommendations)]['title'].tolist()
print("\nRecommended Movies:\n ")
for title in recommended_movie_titles:
  print(title)


Selected Movie: Sonic the Hedgehog 3

Recommended Movies:
 
Moana 2
Death Whisperer 2
The Gorge
Star Trek: Section 31
Jugaremos en el bosque
Red One
Despicable Me 4
Despicable Me 4
The Ballad of Davy Crockett
Jurassic World Dominion


In [14]:
import pickle

with open('recommendation_model.pkl', 'wb') as f:
    pickle.dump({
        'tfidf': tfidf,
        'cosine_sim': cosine_sim,
        'knn': knn,
        'movies_df': movies_df
    }, f)


In [15]:

# Import necessary libraries
import requests
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
import pickle

# Set your TMDB API key
TMDB_API_KEY = input("Enter your API key")

# Function to fetch popular movies from TMDB
def fetch_movies_from_tmdb():
    movies = []
    for page in range(1, 6):  # Fetching multiple pages of popular movies
        url = f'https://api.themoviedb.org/3/movie/popular?api_key={TMDB_API_KEY}&language=en-US&page={page}'
        response = requests.get(url)
        data = response.json()
        movies.extend(data['results'])
    return pd.DataFrame(movies)

# Function to fetch detailed information about a movie
def fetch_movie_details(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}&language=en-US'
    response = requests.get(url)
    return response.json()

# Fetch popular movies and their details
movies_df = fetch_movies_from_tmdb()
movies_df['details'] = movies_df['id'].apply(fetch_movie_details)

# Extract genres and overview to create a 'content' field
movies_df['genres'] = movies_df['details'].apply(lambda x: ' '.join([genre['name'] for genre in x['genres']]))
movies_df['overview'] = movies_df['overview'].fillna('')
movies_df['content'] = movies_df['genres'] + ' ' + movies_df['overview']

# Content-Based Filtering

# Vectorize movie content using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['content'])

# Calculate cosine similarity between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get content-based recommendations
def get_content_based_recommendations(movie_id, cosine_sim=cosine_sim):
    # Handle the case where movie_id might not be present
    if movie_id not in movies_df['id'].values:
        return []  # Return an empty list if movie not found

    idx = movies_df.index[movies_df['id'] == movie_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar movies
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['id'].iloc[movie_indices].tolist()

# Collaborative Filtering

# Initialize k-NN model (will be fit later)
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)

# Function to get collaborative recommendations
def get_collaborative_recommendations(movie_id, knn_model=None):
    ratings_url = f'https://api.themoviedb.org/3/movie/{movie_id}/ratings?api_key={TMDB_API_KEY}&language=en-US'
    response = requests.get(ratings_url)
    data = response.json()

    # Check if 'results' key exists before creating DataFrame
    if 'results' in data:
        ratings_df = pd.DataFrame(data['results'])

        # Normalize ratings
        scaler = MinMaxScaler()
        ratings_matrix = pd.pivot_table(ratings_df, values='rating', index='user_id', columns='movie_id').fillna(0)
        ratings_matrix = scaler.fit_transform(ratings_matrix)

        # Build k-NN model
        if knn is None:
            knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
            knn.fit(ratings_matrix)

        # Check if movie_id is in the ratings matrix
        if movie_id in ratings_matrix.columns:
            movie_idx = list(ratings_matrix.columns).index(movie_id)
            distances, indices = knn.kneighbors(ratings_matrix[:, movie_idx].reshape(1, -1), n_neighbors=11)
            movie_indices = [list(ratings_matrix.columns)[i] for i in indices.flatten()][1:]
            return movie_indices
        else:
            return [] # Return empty list if movie not found in ratings
    else:
        return [] # Return empty list if no ratings found for the movie

# Combined Recommendations

# Function to combine content-based and collaborative recommendations
def get_combined_recommendations(movie_id):
    content_based_recs = get_content_based_recommendations(movie_id)
    collaborative_recs = get_collaborative_recommendations(movie_id)
    combined_recs = list(set(content_based_recs + collaborative_recs))
    return combined_recs[:10]  # Get top 10 recommendations

# Example usage
movie_id = 533535
recommendations = get_combined_recommendations(movie_id)
print(recommendations)

# Show the movie corresponding to the movie id
movie_title = movies_df[movies_df['id'] == movie_id]['title'].values[0]
print(f"Selected Movie: {movie_title}")

# Show the movies corresponding to the recommended movies
recommended_movie_titles = movies_df[movies_df['id'].isin(recommendations)]['title'].tolist()
print("\nRecommended Movies:")
for title in recommended_movie_titles:
  print(title)

# Save the recommendation model
with open('recommendation_model.pkl', 'wb') as f:
    pickle.dump({
        'tfidf': tfidf,
        'cosine_sim': cosine_sim,
        'knn': knn,
        'movies_df': movies_df
    }, f)


Enter your API key306c333178bf1802b38c1f8863f606fc
[1064486, 1035048, 945961, 939243, 1114894, 519182, 1196470]
Selected Movie: Deadpool & Wolverine

Recommended Movies:
Sonic the Hedgehog 3
Elevation
Elevation
Star Trek: Section 31
Despicable Me 4
Memoir of a Snail
Despicable Me 4
Memoir of a Snail
Alien: Romulus
Survive
