In [1]:
import time
import requests
from dotenv import load_dotenv
import os
import pandas as pd
from tqdm import tqdm  # <- optional progress bar

#–– Load API key
load_dotenv('secrets.env')
key = os.getenv('API_KEY')
parameters = {'api_key': key}

#–– Read CSV, filter to only rows with any null, keep original indices
dataframe = pd.read_csv('netflix_titles.csv')
df = dataframe[dataframe.isnull().any(axis=1)].copy()

#–– In‐memory caches (title → ID)
movie_id_cache = {}
series_id_cache = {}

def get_movie_id(movie_name):
    # Check cache first
    if movie_name in movie_id_cache:
        return movie_id_cache[movie_name]
    
    url_for_movies = "https://api.themoviedb.org/3/search/movie"
    params = {'api_key': key, 'query': movie_name}
    try:
        response = requests.get(url_for_movies, params=params)
        response.raise_for_status()
        results = response.json().get('results', [])
        result_id = results[0].get('id', 'Not Available') if results else 'Not Available'
        movie_id_cache[movie_name] = result_id
        return result_id
    except requests.RequestException as e:
        print(f"Request failed for '{movie_name}': {e}")
        return 'Error'
    finally:
        time.sleep(0.25)

def get_movie_director(movie_id):
    if movie_id in ('Not Available', 'Error', None):
        return 'Not Available'
    try:
        movie_id = int(movie_id)
    except (ValueError, TypeError):
        return 'Not Available'

    url_for_movies_credits = f"https://api.themoviedb.org/3/movie/{movie_id}/credits"
    try:
        response = requests.get(url_for_movies_credits, params={'api_key': key})
        response.raise_for_status()
        crew = response.json().get('crew', [])
        directors = [member.get('name') for member in crew if 'director' in member.get('job', '').lower()]
        return ', '.join(directors) if directors else 'Not Available'
    except requests.RequestException as e:
        print(f"Request failed for movie_id '{movie_id}': {e}")
        return 'Error'
    finally:
        time.sleep(0.25)

def get_movie_cast(movie_id):
    if movie_id in ('Not Available', 'Error', None):
        return 'Not Available'
    try:
        movie_id = int(movie_id)
    except (ValueError, TypeError):
        return 'Not Available'

    url_for_movies_credits = f"https://api.themoviedb.org/3/movie/{movie_id}/credits"
    try:
        response = requests.get(url_for_movies_credits, params={'api_key': key})
        response.raise_for_status()
        cast = response.json().get('cast', [])
        cast_names = [member.get('name') for member in cast if member.get('name')]
        return ', '.join(cast_names) if cast_names else 'Not Available'
    except requests.RequestException as e:
        print(f"Request failed for movie_id '{movie_id}': {e}")
        return 'Error'
    finally:
        time.sleep(0.25)

def get_series_id(series_name):
    if series_name in series_id_cache:
        return series_id_cache[series_name]
    
    url_for_shows = 'https://api.themoviedb.org/3/search/tv'
    params = {'api_key': key, 'query': series_name}
    try:
        response = requests.get(url_for_shows, params=params)
        response.raise_for_status()
        results = response.json().get('results', [])
        result_id = results[0].get('id', 'Not Available') if results else 'Not Available'
        series_id_cache[series_name] = result_id
        return result_id
    except requests.RequestException as e:
        print(f"Request failed for series '{series_name}': {e}")
        return 'Error'
    finally:
        time.sleep(0.25)

def get_series_director(series_id):
    if series_id in ('Not Available', 'Error', None):
        return 'Not Available'
    try:
        series_id = int(series_id)
    except (ValueError, TypeError):
        return 'Not Available'

    url_for_series_credits = f'https://api.themoviedb.org/3/tv/{series_id}/season/1/credits'
    try:
        response = requests.get(url_for_series_credits, params={'api_key': key})
        response.raise_for_status()
        crew = response.json().get('crew', [])
        directors = [member.get('name') for member in crew if 'director' in member.get('job', '').lower()]
        return ', '.join(directors) if directors else 'Not Available'
    except requests.RequestException as e:
        print(f"Request failed for series_id '{series_id}': {e}")
        return 'Error'
    finally:
        time.sleep(0.25)

def get_series_cast(series_id):
    if series_id in ('Not Available', 'Error', None):
        return 'Not Available'
    try:
        series_id = int(series_id)
    except (ValueError, TypeError):
        return 'Not Available'

    url_for_series_credits = f'https://api.themoviedb.org/3/tv/{series_id}/season/1/credits'
    try:
        response = requests.get(url_for_series_credits, params={'api_key': key})
        response.raise_for_status()
        cast = response.json().get('cast', [])
        cast_names = [member.get('name') for member in cast if member.get('name')]
        return ', '.join(cast_names) if cast_names else 'Not Available'
    except requests.RequestException as e:
        print(f"Request failed for series_id '{series_id}': {e}")
        return 'Error'
    finally:
        time.sleep(0.25)

# Only iterate rows missing cast or director
mask_missing = df['cast'].isna() | df['director'].isna()
rows_to_process = df[mask_missing]

for index, row in tqdm(rows_to_process.iterrows(), total=rows_to_process.shape[0]):
    title = row.get('title')
    content_type = row.get('type')

    if pd.isna(title) or pd.isna(content_type):
        continue

    if content_type == 'Movie':
        try:
            movie_id = get_movie_id(title)
            if movie_id not in ('Not Available', 'Error'):
                if pd.isna(row.get('cast')):
                    cast = get_movie_cast(movie_id)
                    if cast not in ('Not Available', 'Error'):
                        df.at[index, 'cast'] = cast

                if pd.isna(row.get('director')):
                    director = get_movie_director(movie_id)
                    if director not in ('Not Available', 'Error'):
                        df.at[index, 'director'] = director

        except Exception as e:
            print(f"Error processing movie row {index}: {e}")
            continue

    else:  # Series
        try:
            series_id = get_series_id(title)
            if series_id not in ('Not Available', 'Error'):
                if pd.isna(row.get('cast')):
                    cast = get_series_cast(series_id)
                    if cast not in ('Not Available', 'Error'):
                        df.at[index, 'cast'] = cast

                if pd.isna(row.get('director')):
                    director = get_series_director(series_id)
                    if director not in ('Not Available', 'Error'):
                        df.at[index, 'director'] = director

        except Exception as e:
            print(f"Error processing series row {index}: {e}")
            continue

# Merge updates back into the full dataframe and save
dataframe.loc[df.index, ['cast', 'director']] = df[['cast', 'director']]
dataframe.to_csv('netflix_titles_updated.csv', index=False)


 21%|████████▎                               | 645/3107 [17:22<50:51,  1.24s/it]

Request failed for series_id '215910': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/215910/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 24%|████████▉                             | 734/3107 [20:21<1:15:24,  1.91s/it]

Request failed for series_id '33265': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/33265/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 29%|███████████▍                            | 890/3107 [24:54<45:52,  1.24s/it]

Request failed for series_id '13924': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/13924/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 44%|█████████████████▎                     | 1377/3107 [34:59<34:50,  1.21s/it]

Request failed for series_id '261645': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/261645/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 59%|██████████████████████▉                | 1832/3107 [44:00<24:36,  1.16s/it]

Request failed for series_id '2986': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/2986/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45
Request failed for series_id '2986': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/2986/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 62%|████████████████████████▏              | 1928/3107 [45:53<20:56,  1.07s/it]

Request failed for series_id '17368': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/17368/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 66%|█████████████████████████▊             | 2055/3107 [48:19<20:30,  1.17s/it]

Request failed for series_id '17451': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/17451/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 67%|██████████████████████████▏            | 2091/3107 [49:02<19:31,  1.15s/it]

Request failed for series_id '40529': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/40529/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 68%|██████████████████████████▌            | 2115/3107 [49:30<18:04,  1.09s/it]

Request failed for series_id '28894': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/28894/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 76%|█████████████████████████████▌         | 2351/3107 [54:05<12:20,  1.02it/s]

Request failed for series_id '31964': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/31964/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 82%|████████████████████████████████▏      | 2561/3107 [58:03<10:16,  1.13s/it]

Request failed for series_id '32227': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/32227/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 85%|█████████████████████████████████▎     | 2654/3107 [59:49<07:00,  1.08it/s]

Request failed for series_id '35353': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/35353/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 88%|████████████████████████████████▍    | 2720/3107 [1:01:02<08:14,  1.28s/it]

Request failed for series_id '56584': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/56584/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45
Request failed for series_id '56584': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/56584/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 90%|█████████████████████████████████▏   | 2785/3107 [1:02:13<06:21,  1.19s/it]

Request failed for series_id '58094': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/58094/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


 96%|███████████████████████████████████▋ | 2998/3107 [1:06:15<02:13,  1.23s/it]

Request failed for series_id '255449': 404 Client Error: Not Found for url: https://api.themoviedb.org/3/tv/255449/season/1/credits?api_key=32dbe6d07ffdac1942e8ac6462168e45


100%|█████████████████████████████████████| 3107/3107 [1:08:29<00:00,  1.32s/it]
