## **TMDB Movie Data Analysis using Pandas and APIs**


In [2]:
import os
import time
import requests
import pandas as pd
import numpy as np
from dotenv import load_dotenv

### 1. **Fetching the movie data from the API**

In [3]:
load_dotenv()

TMDB_API_KEY = os.getenv("TMDB_API_KEY")

print(f"The TMDB API Key is fetched")

The TMDB API Key is fetched


In [4]:
# TMDB base URLs and Endpoints
BASE_URL = "https://api.themoviedb.org/3"
MOVIE_ENDPOINT = f"{BASE_URL}/movie"
CREDITS_ENDPOINT = f"{BASE_URL}/movie"

# Movie IDs to fetch
movie_ids = [
    0, 299534, 19995, 140607, 299536, 597, 135397, 420818,
    24428, 168259, 99861, 284054, 12445, 181808, 330457,
    351286, 109445, 321612, 260513
]

len(movie_ids), movie_ids[:5]


(19, [0, 299534, 19995, 140607, 299536])

In [5]:
def get_tmdb_json(url, params=None, verbose=True):
    """Get JSON from the API. Returns None if it fails."""
    
    if params is None:
        params = {}
    params["api_key"] = TMDB_API_KEY
    
    try:
        r = requests.get(url, params=params)
        r.raise_for_status()
        return r.json()
    except Exception as e:
        if verbose:
            print(f"Failed to get data from {url}")
        return None


In [6]:
def fetch_movie_with_credits(movie_id, sleep_time=0.2):
    """Get movie info and credits for an ID."""
    
    # Get movie details
    details_url = f"{MOVIE_ENDPOINT}/{movie_id}"
    details = get_tmdb_json(details_url)
    
    if not details:
        print(f"ID {movie_id}: Movie not found")
        return None
    
    # Get credits
    credits_url = f"{CREDITS_ENDPOINT}/{movie_id}/credits"
    credits = get_tmdb_json(credits_url)
    
    if not credits:
        credits = {"cast": [], "crew": []}
    
    # Build the record
    record = {}
    fields = [
        "id", "title", "tagline", "release_date", "genres",
        "belongs_to_collection", "original_language", "budget",
        "revenue", "production_companies", "production_countries",
        "vote_count", "vote_average", "popularity", "runtime",
        "overview", "spoken_languages", "poster_path", "status",
        "adult", "imdb_id", "original_title", "video", "homepage"
    ]
    
    for field in fields:
        record[field] = details.get(field)
    
    # Add credits
    record["cast"] = credits.get("cast", [])
    record["crew"] = credits.get("crew", [])
    
    # Wait a bit before next call
    time.sleep(sleep_time)
    
    return record

In [7]:
records = []

for m_id in movie_ids:
    print(f"Fetching movie_id = {m_id}")
    rec = fetch_movie_with_credits(m_id)
    if rec is not None:
        records.append(rec)

df_raw = pd.DataFrame(records)

df_raw.shape, df_raw.columns


Fetching movie_id = 0
Failed to get data from https://api.themoviedb.org/3/movie/0
ID 0: Movie not found
Fetching movie_id = 299534
Fetching movie_id = 19995
Fetching movie_id = 140607
Fetching movie_id = 299536
Fetching movie_id = 597
Fetching movie_id = 135397
Fetching movie_id = 420818
Fetching movie_id = 24428
Fetching movie_id = 168259
Fetching movie_id = 99861
Fetching movie_id = 284054
Fetching movie_id = 12445
Fetching movie_id = 181808
Fetching movie_id = 330457
Fetching movie_id = 351286
Fetching movie_id = 109445
Fetching movie_id = 321612
Fetching movie_id = 260513


((18, 26),
 Index(['id', 'title', 'tagline', 'release_date', 'genres',
        'belongs_to_collection', 'original_language', 'budget', 'revenue',
        'production_companies', 'production_countries', 'vote_count',
        'vote_average', 'popularity', 'runtime', 'overview', 'spoken_languages',
        'poster_path', 'status', 'adult', 'imdb_id', 'original_title', 'video',
        'homepage', 'cast', 'crew'],
       dtype='object'))

In [8]:
df_raw.head()

Unnamed: 0,id,title,tagline,release_date,genres,belongs_to_collection,original_language,budget,revenue,production_companies,...,spoken_languages,poster_path,status,adult,imdb_id,original_title,video,homepage,cast,crew
0,299534,Avengers: Endgame,Avenge the fallen.,2019-04-24,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",en,356000000,2799439100,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...",...,"[{'english_name': 'English', 'iso_639_1': 'en'...",/bR8ISy1O9XQxqiy0fQFw2BX72RQ.jpg,Released,False,tt4154796,Avengers: Endgame,False,https://www.marvel.com/movies/avengers-endgame,"[{'adult': False, 'gender': 2, 'id': 3223, 'kn...","[{'adult': False, 'gender': 0, 'id': 3019687, ..."
1,19995,Avatar,Enter the world of Pandora.,2009-12-15,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",en,237000000,2923706026,"[{'id': 444, 'logo_path': None, 'name': 'Dune ...",...,"[{'english_name': 'English', 'iso_639_1': 'en'...",/gKY6q7SjCkAU6FqvqWybDYgUKIF.jpg,Released,False,tt0499549,Avatar,False,https://www.avatar.com/movies/avatar,"[{'adult': False, 'gender': 2, 'id': 65731, 'k...","[{'adult': False, 'gender': 2, 'id': 58871, 'k..."
2,140607,Star Wars: The Force Awakens,Every generation has a story.,2015-12-15,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 10, 'name': 'Star Wars Collection', 'po...",en,245000000,2068223624,"[{'id': 1, 'logo_path': '/tlVSws0RvvtPBwViUyOF...",...,"[{'english_name': 'English', 'iso_639_1': 'en'...",/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,Released,False,tt2488496,Star Wars: The Force Awakens,False,http://www.starwars.com/films/star-wars-episod...,"[{'adult': False, 'gender': 2, 'id': 3, 'known...","[{'adult': False, 'gender': 2, 'id': 491, 'kno..."
3,299536,Avengers: Infinity War,Destiny arrives all the same.,2018-04-25,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",en,300000000,2052415039,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...",...,"[{'english_name': 'English', 'iso_639_1': 'en'...",/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,Released,False,tt4154756,Avengers: Infinity War,False,https://www.marvel.com/movies/avengers-infinit...,"[{'adult': False, 'gender': 2, 'id': 3223, 'kn...","[{'adult': False, 'gender': 0, 'id': 3019687, ..."
4,597,Titanic,Nothing on earth could come between them.,1997-11-18,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,en,200000000,2264162353,"[{'id': 4, 'logo_path': '/jay6WcMgagAklUt7i9Eu...",...,"[{'english_name': 'English', 'iso_639_1': 'en'...",/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,Released,False,tt0120338,Titanic,False,https://www.paramountmovies.com/movies/titanic,"[{'adult': False, 'gender': 2, 'id': 6193, 'kn...","[{'adult': False, 'gender': 2, 'id': 2710, 'kn..."


In [9]:
df_raw.dtypes

id                         int64
title                     object
tagline                   object
release_date              object
genres                    object
belongs_to_collection     object
original_language         object
budget                     int64
revenue                    int64
production_companies      object
production_countries      object
vote_count                 int64
vote_average             float64
popularity               float64
runtime                    int64
overview                  object
spoken_languages          object
poster_path               object
status                    object
adult                       bool
imdb_id                   object
original_title            object
video                       bool
homepage                  object
cast                      object
crew                      object
dtype: object