In [1]:
import pandas as pd
import numpy as np
import requests
import time
import re

## Getting our API

In [2]:
with open('movie-api.txt', 'r') as file:
    apikey = file.read()

In [None]:
base_url = "https://api.themoviedb.org/3/movie/"
pop_reviews = "movie/reviews"
url = f"{base_url}?api_key={apikey}"

movie_data = []

for movie_id in range(1, 5000):  # Adjust this range as needed
    # Get movie details
    movie_url = f"{base_url}{movie_id}?api_key={apikey}"
    r = requests.get(movie_url)
    
    if r.ok:
        movie = r.json()
        
        # Prepare movie details
        movie_details = {
            'id': movie.get('id'),
            'title': movie.get('title'),
            'release_date': movie.get('release_date'),
            'revenue': movie.get('revenue'),
            'budget': movie.get('budget'),
            'production_companies': ', '.join([company['name'] for company in movie.get('production_companies', [])]),
            'genres': ', '.join([genre['name'] for genre in movie.get('genres', [])]),
            'popularity': movie.get('popularity'),
            'vote_average': movie.get('vote_average'),
            'vote_count': movie.get('vote_count'),
            'overview': movie.get('overview')
        }

        # Get movie reviews
        reviews_url = f"{base_url}{movie_id}/reviews?api_key={apikey}"
        reviews_response = requests.get(reviews_url)
        
        if reviews_response.ok:
            reviews_data = reviews_response.json()
            reviews = reviews_data.get('results', [])
            
            # Get all movie reviews and combine them into one long review
            review_texts = [review['content'] for review in reviews]
            movie_details['reviews'] = ' '.join(review_texts) if review_texts else 'No reviews available'
        else:
            movie_details['reviews'] = 'No reviews available'
        
        movie_data.append(movie_details)
    
    # Add a delay to prevent overwhelming the API
    time.sleep(1)

# Convert to data frame
movies_df = pd.DataFrame(movie_data)
movies_df.head(20)

In [None]:
movies_df.to_csv('movies.csv', index = False)