In [6]:
import os
import requests
import json
import time
from dotenv import load_dotenv
import pandas as pd

In [None]:
# Step 1: Fetch Movie Data from API

# Load Environment Variables
load_dotenv()
API_KEY = os.getenv("API_KEY")
BASE_URL = os.getenv("BASE_URL")

# 2. Fetch movies with ID
movie_ids = [0, 299534, 19995, 140607, 299536, 597, 135397, 420818, 24428, 168259, 99861, 284054, 12445, 181808, 330457, 351286, 109445, 321612, 260513]

def fetch_movie_batch(id_list):
    # Store the data as a Pandas DataFrame.
    data_buffer = []
    
    print(f"Starting extraction for {len(id_list)} records...")

    for movie_id in id_list:
        # Construct the specific URL for this resource
        target_url = f"{BASE_URL}{movie_id}"
        
        # Define parameters required by the API
        params = {
            "api_key": API_KEY,
            "language": "en-US"
        }

        try:
            # Perform the GET request
            response = requests.get(target_url, params=params)
            
            # Check for HTTP 200 OK (Success)
            if response.status_code == 200:
                payload = response.json()
                
                # elect specific columns we care about
                record = {
                    "movie_id": payload.get("id"),
                    "title": payload.get("title"),
                    "release_date": pd.to_datetime(payload.get("release_date"), errors='coerce'),
                    "budget": payload.get("budget"),
                    "revenue": payload.get("revenue"),
                    "status": "Success"
                }
                data_buffer.append(record)
                print(f"movie_ids: {payload.get('id')}")

            # Check for HTTP 404 (Resource Not Found)
            elif response.status_code == 404:
                print(f"Failed: ID {movie_id} not found.")
                # We log the failure in the dataset for auditability
                error_record = {
                    "movie_id": movie_id,
                    "title": None,
                    "release_date": None,
                    "budget": 0,
                    "revenue": 0,
                    "status": "Not Found"
                }
                data_buffer.append(error_record)

            else:
                print(f"Error: Status {response.status_code} for ID {movie_id}")

        except Exception as e:
            print(f"Exception for ID {movie_id}: {e}")

        # Rate Limiting: Pause for 200ms to avoid overwhelming the API
        time.sleep(0.2)

    # Convert the list of dictionaries into a Pandas DataFrame
    df = pd.DataFrame(data_buffer)
    return df

if __name__ == "__main__":
    # Check for credentials before running
    if not API_KEY or not BASE_URL:
        print("System Error: API_KEY or BASE_URL is missing from .env configuration.")
    else:
        # Execute the extraction
        movie_df = fetch_movie_batch(movie_ids)

       

Starting extraction for 19 records...
Failed: ID 0 not found.
movie_ids: 299534
movie_ids: 19995
movie_ids: 140607
movie_ids: 299536
movie_ids: 597
movie_ids: 135397
movie_ids: 420818
movie_ids: 24428
movie_ids: 168259
movie_ids: 99861
movie_ids: 284054
movie_ids: 12445
movie_ids: 181808
movie_ids: 330457
movie_ids: 351286
movie_ids: 109445
movie_ids: 321612
movie_ids: 260513

--- DataFrame Summary ---
   movie_id                         title release_date     budget     revenue  \
0         0                          None          NaT          0           0   
1    299534             Avengers: Endgame   2019-04-24  356000000  2799439100   
2     19995                        Avatar   2009-12-15  237000000  2923706026   
3    140607  Star Wars: The Force Awakens   2015-12-15  245000000  2068223624   
4    299536        Avengers: Infinity War   2018-04-25  300000000  2052415039   
5       597                       Titanic   1997-11-18  200000000  2264162353   
6    135397                