In [8]:
import json
import pandas as pd

# Results look like:
# Array<{
#   results: Array<{
#     adult: boolean,
#     backdrop_path: string,
#     genre_ids: number[],
#     id: number,
#     original_language: string,
#     original_title: string,
#     overview: string,
#     popularity: number,
#     poster_path: string,
#     release_date: string,
#     title: string,
#     video: boolean,
#     vote_average: number,
#     vote_count: number
#   }>,
#   total_pages: number,
#   total_results: number,
#   id: number,
#   page: number
# }>

with open("../../data/tmdb_data.json", "r") as f:
    data = json.load(f)

all_results = []
for response in data:
    all_results.extend(response["results"])
print(f"Total movies: {len(all_results)}")

df = pd.DataFrame(all_results)

Total movies: 6646


In [13]:
# For every id, 

import asyncio
import aiohttp
import os
import dotenv

dotenv.load_dotenv()

TMDB_READ_ACCESS_TOKEN = os.getenv("TMDB_READ_ACCESS_TOKEN")


async def get_movie_details(session: aiohttp.ClientSession, movie_id: int, index: int, total: int):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
    
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {TMDB_READ_ACCESS_TOKEN}"
    }

    async with session.get(url, headers=headers) as response:
        data = await response.json()
        print(f"{index+1}/{total}: TMDB ID {movie_id}")
        return data

async def fetch_all_details():
    async with aiohttp.ClientSession() as session:
        movie_ids = df['id']
        tasks = [get_movie_details(session, movie_id, i, len(movie_ids)) 
                for i, movie_id in enumerate(movie_ids)]
        
        # Process in batches of 10 to avoid rate limits
        all_details = []
        batch_size = 1000
        
        for i in range(0, len(tasks), batch_size):
            batch = tasks[i:i + batch_size]
            batch_results = await asyncio.gather(*batch)
            all_details.extend(batch_results)
            # await asyncio.sleep(1)
            
        with open("../../data/tmdb_movie_details.json", "w") as f:
            json.dump(all_details, f, indent=2)
            print(f"Saved details for {len(all_details)} movies")
        
        return all_details

# Run the async function
movie_details = await fetch_all_details()
details_df = pd.DataFrame(movie_details)

  for key, value in headers.items():


73/6646: TMDB ID 10830
17/6646: TMDB ID 808
13/6646: TMDB ID 122
67/6646: TMDB ID 818647
69/6646: TMDB ID 10363
70/6646: TMDB ID 61979
15/6646: TMDB ID 118
72/6646: TMDB ID 694
71/6646: TMDB ID 10137
12/6646: TMDB ID 1156593
16/6646: TMDB ID 14836
14/6646: TMDB ID 693134
68/6646: TMDB ID 101299
11/6646: TMDB ID 120
18/6646: TMDB ID 843527
89/6646: TMDB ID 769
32/6646: TMDB ID 101299
29/6646: TMDB ID 748230
28/6646: TMDB ID 12155
38/6646: TMDB ID 615656
27/6646: TMDB ID 799583
33/6646: TMDB ID 8966
23/6646: TMDB ID 10625
87/6646: TMDB ID 497
80/6646: TMDB ID 613504
31/6646: TMDB ID 438631
82/6646: TMDB ID 744275
77/6646: TMDB ID 1020896
79/6646: TMDB ID 11324
88/6646: TMDB ID 466420
78/6646: TMDB ID 1134754
83/6646: TMDB ID 1016346
93/6646: TMDB ID 281957
75/6646: TMDB ID 286217
19/6646: TMDB ID 122917
30/6646: TMDB ID 674
25/6646: TMDB ID 872585
86/6646: TMDB ID 116149
85/6646: TMDB ID 739547
21/6646: TMDB ID 121
76/6646: TMDB ID 695721
43/6646: TMDB ID 11036
49/6646: TMDB ID 1086747
2

In [14]:
details_df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,origin_country,original_language,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,status_code,status_message,success
0,False,/llIXQAndg5kB6SWlp6ouUdO7Zxd.jpg,,46700000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 36, '...",,1084736.0,tt26446278,"[BE, FR]",fr,...,"[{'english_name': 'Italian', 'iso_639_1': 'it'...",Released,"This is not vengeance, this is justice.",The Count of Monte-Cristo,False,8.326,746.0,,,
1,False,/2fxnTXr8NwyTFkunkimJkGkhqfy.jpg,,0.0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",https://nostromopictures.com/en/movies/coming-...,1118031.0,tt27599851,[ES],es,...,"[{'english_name': 'Arabic', 'iso_639_1': 'ar',...",Released,,Apocalypse Z: The Beginning of the End,False,6.700,215.0,,,
2,False,/zAqBIeO71BFL7bAtP5TFzVjVamy.jpg,,25000000.0,"[{'id': 10749, 'name': 'Romance'}, {'id': 18, ...",https://www.itendswithus.movie,1079091.0,tt10655524,[US],en,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,We break the pattern or the pattern breaks us.,It Ends with Us,False,7.196,979.0,,,
3,False,/vGZHjvyAcmD1mHg5kLwITCgh74U.jpg,,0.0,"[{'id': 16, 'name': 'Animation'}, {'id': 18, '...",https://tunneltosummerfilm.co.uk,916192.0,tt17382524,[JP],ja,...,"[{'english_name': 'Japanese', 'iso_639_1': 'ja...",Released,A mysterious tunnel can grant your fondest wis...,"The Tunnel to Summer, the Exit of Goodbyes",False,7.700,165.0,,,
4,False,/zfbjgQE1uSd9wiPTX4VzsLi0rGG.jpg,,25000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,278.0,tt0111161,[US],en,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Fear can hold you prisoner. Hope can set you f...,The Shawshank Redemption,False,8.707,27061.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6641,False,,,0.0,"[{'id': 16, 'name': 'Animation'}, {'id': 14, '...",,894540.0,,[JP],ja,...,"[{'english_name': 'Japanese', 'iso_639_1': 'ja...",In Production,,運命戦線 リデルライト,False,0.000,0.0,,,
6642,False,,,0.0,"[{'id': 18, 'name': 'Drama'}]",,768226.0,tt1235191,[XC],cs,...,"[{'english_name': 'No Language', 'iso_639_1': ...",Released,,Zpovědnice,False,0.000,0.0,,,
6643,False,,,0.0,"[{'id': 18, 'name': 'Drama'}]",,766405.0,tt1082817,[XC],cs,...,"[{'english_name': 'No Language', 'iso_639_1': ...",Released,,Dar svatební noci,False,0.000,0.0,,,
6644,False,,,0.0,"[{'id': 35, 'name': 'Comedy'}]",,765027.0,,[XC],cs,...,"[{'english_name': 'No Language', 'iso_639_1': ...",Released,,Výlet pana Broučka na Mars,False,0.000,0.0,,,


In [15]:
details_df.to_csv("../../data/tmdb_movie_details_df.csv", index=False)
# BING BANG BOSH we got revenue data for 6K+ adaptations