# ⚠️ Experimental / Not Used in Final Pipeline

This notebook explores a TMDB-based recommendation approach.
The final application uses a dataset-only recommendation engine
for stability, speed, and reproducibility.


In [1]:
import os
import requests
import pickle
import pandas as pd
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity

load_dotenv()

API_KEY = os.getenv("TMDB_API_KEY")
BASE_URL = "https://api.themoviedb.org/3"

# Load local data & vectors
df = pd.read_csv("../data/movies_cleaned.csv")

with open("../data/tfidf_vectorizer.pkl", "rb") as f:
    tfidf = pickle.load(f)

with open("../data/movie_vectors.pkl", "rb") as f:
    movie_vectors = pickle.load(f)

print("TMDB + local assets loaded")


TMDB + local assets loaded


In [2]:
def tmdb_search_movie(movie_name):
    url = f"{BASE_URL}/search/movie"
    params = {
        "api_key": API_KEY,
        "query": movie_name,
        "include_adult": False
    }

    response = requests.get(url, params=params)

    if response.status_code != 200:
        return None

    results = response.json().get("results", [])
    if not results:
        return None

    return results[0]  # best match


In [3]:
def build_input_text(tmdb_movie):
    title = tmdb_movie.get("title", "")
    overview = tmdb_movie.get("overview", "")

    return (title + " " + overview).lower()


In [4]:
def recommend_any_movie(movie_name, top_n=5):
    #  If movie exists locally, use dataset recommender
    if movie_name in df["title"].values:
        idx = df[df["title"] == movie_name].index[0]
        scores = cosine_similarity(
            movie_vectors[idx],
            movie_vectors
        ).flatten()

        indices = scores.argsort()[::-1][1:top_n+1]
        return df.iloc[indices][["title", "vote_average"]]

    #  Otherwise, search via TMDB
    tmdb_movie = tmdb_search_movie(movie_name)
    if tmdb_movie is None:
        return "Movie not found"

    input_text = build_input_text(tmdb_movie)

    input_vector = tfidf.transform([input_text])
    scores = cosine_similarity(input_vector, movie_vectors).flatten()

    indices = scores.argsort()[::-1][:top_n]
    return df.iloc[indices][["title", "vote_average"]]


In [5]:
recommend_any_movie("Batman")


Unnamed: 0,title,vote_average
3906,Batman Returns,6.9
2298,Batman Beyond: Return of the Joker,7.3
4761,Batman: Gotham by Gaslight,6.7
3439,The Batman vs. Dracula,7.0
847,Batman Begins,7.7


In [8]:
recommend_any_movie("Lucky the racer")

Unnamed: 0,title,vote_average
1129,2 Hearts,7.6
4662,Logan Lucky,6.7
8640,I Could Never Be Your Woman,5.9
7676,Green Card,6.1
4580,Four Brothers,6.7
