In [None]:
import requests
import time
import csv

api_key = "eb9da568dab30f94af57caf684eb3f33"
movies_csv = "movies_10000.csv"
reviews_csv = "movie_reviews.csv"
total_pages = 10 

session = requests.Session()
session.headers.update({"Accept": "application/json"})

def fetch_movies():
    movies = []
    movie_ids = []

    for page in range(1, total_pages + 1):
        params = {
            "api_key": api_key,
            "language": "en-US",
            "sort_by": "popularity.desc",
            "page": page
        }

        try:
            res = session.get("https://api.themoviedb.org/3/discover/movie", params=params, timeout=10)
            res.raise_for_status()
            data = res.json()

            for movie in data.get("results", []):
                movies.append({
                    "id": movie.get("id"),
                    "title": movie.get("title"),
                    "release_date": movie.get("release_date"),
                    "popularity": movie.get("popularity"),
                    "vote_average": movie.get("vote_average"),
                    "genre_ids": movie.get("genre_ids"),
                    "overview": movie.get("overview")
                })
                movie_ids.append(movie.get("id"))

            if page % 10 == 0:
                print(f"Page {page} done ({len(movies)} movies)")
                time.sleep(1)  # Pause every 10 pages to respect rate limit

        except Exception as e:
            print(f"Error on page {page}: {e}")

    return movies, movie_ids

def save_movies_to_csv(movies):
    with open(movies_csv, "w", newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=movies[0].keys())
        writer.writeheader()
        writer.writerows(movies)
    print(f"Saved {len(movies)} movies to {movies_csv}")

def fetch_reviews(movie_ids, limit_per_movie=5):
    reviews = []
    for i, movie_id in enumerate(movie_ids):
        params = {
            "api_key": api_key,
            "language": "en-US",
            "page": 1
        }

        try:
            url = f"https://api.themoviedb.org/3/movie/{movie_id}/reviews"
            res = session.get(url, params=params, timeout=10)
            res.raise_for_status()
            data = res.json()

            for review in data.get("results", [])[:limit_per_movie]:
                reviews.append({
                    "movie_id": movie_id,
                    "author": review.get("author"),
                    "content": review.get("content"),
                    "created_at": review.get("created_at")
                })

            if i % 20 == 0:
                print(f"{i}/{len(movie_ids)} reviews fetched...")
                time.sleep(1)

        except Exception as e:
            print(f"Review fetch error for movie {movie_id}: {e}")

    return reviews

def save_reviews_to_csv(reviews):
    if reviews:
        with open(reviews_csv, "w", newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=reviews[0].keys())
            writer.writeheader()
            writer.writerows(reviews)
        print(f" Saved {len(reviews)} reviews to {reviews_csv}")
    else:
        print("No reviews to save.")

if __name__ == "__main__":
    print("Fetching movies...")
    movies, movie_ids = fetch_movies()
    save_movies_to_csv(movies)

    print("\nFetching reviews...")
    reviews = fetch_reviews(movie_ids)
    save_reviews_to_csv(reviews)

    print("\nDone.")
