In [4]:
#open api 
#import API key 
import requests
import json
import csv
with open('../api_key.json') as f:
    keys = json.load(f)
TMDB_KEY = keys['TMDB_key']

In [None]:
# Input and output file names
input_file = "../data/input_data/file_1.csv"         # File containing the list of movies
output_file = "../data/output_data/OMDB/movies_T.csv"  # New file with movie details

# Start index
START_INDEX = 0  # Process from movie #1001 onward
BATCH_SIZE = 5000   # Number of movies to process in each run

# Read all movie titles from the input CSV file
with open(input_file, mode="r", encoding="utf-8") as infile:
    reader = csv.reader(infile)
    movie_titles = [row[0] for row in reader]  # Assuming titles are in the first column

# Get the subset of movies to process in this batch
movies_to_process = movie_titles[START_INDEX:START_INDEX + BATCH_SIZE]

# Open the output file in append mode to continue adding data
with open(output_file, mode="a", newline="", encoding="utf-8") as outfile:
    writer = csv.writer(outfile)

    # If the file is new, add headers
    if START_INDEX == 0:
        writer.writerow([
            "Title", "Tagline", "Overview", "Budget", "Revenue", "TMDB Rating", "Vote Count", "Production Companies"
        ])

    for movie_title in movies_to_process:
        # Search for the movie by title
        search_url = "https://api.themoviedb.org/3/search/movie"
        params = {"api_key": TMDB_KEY, "query": movie_title}
        search_response = requests.get(search_url, params=params)

        if search_response.status_code == 200 and search_response.json()["results"]:
            movie_id = search_response.json()["results"][0]["id"]

            # Get detailed info
            details_url = f"https://api.themoviedb.org/3/movie/{movie_id}"
            details_params = {"api_key": TMDB_KEY}
            details_response = requests.get(details_url, params=details_params)

            if details_response.status_code == 200:
                data = details_response.json()

                title = data.get("title", "N/A")
                tagline = data.get("tagline", "N/A")
                overview = data.get("overview", "N/A")
                budget = data.get("budget", "N/A")
                revenue = data.get("revenue", "N/A")
                vote_average = data.get("vote_average", "N/A")
                vote_count = data.get("vote_count", "N/A")
                production_companies = ", ".join([c["name"] for c in data.get("production_companies", [])])

                writer.writerow([
                    title,
                    tagline,
                    overview,
                    budget,
                    revenue,
                    vote_average,
                    vote_count,
                    production_companies
                ])

                print(f"✓ Processed: {movie_title}")
            else:
                print(f"✗ Failed to get details for: {movie_title}")
        else:
            print(f"✗ Movie not found: {movie_title}")

print(f"✅ Done. Processed {len(movies_to_process)} movies.")

✓ Processed: Title
✓ Processed: Four Brothers
✓ Processed: The Adventures of Tintin
✓ Processed: Green Lantern
✓ Processed: Top Secret!
✓ Processed: The Beastmaster
✓ Processed: Kong: Skull Island
✓ Processed: Tag
✓ Processed: Big Fat Liar
✓ Processed: Upgraded
✅ Done. Processed 10 movies.
