In [1]:
import pandas as pd
import numpy as np
import requests
import time
import re

## Getting our API

In [2]:
with open('movie-api.txt', 'r') as file:
    apikey = file.read()

In [3]:
base_url = "https://api.themoviedb.org/3/movie/"
pop_reviews = "movie/reviews"
url = f"{base_url}?api_key={apikey}"

movie_data = []

for movie_id in range(1, 6000):  # Adjust this range as needed
    # Get movie details
    movie_url = f"{base_url}{movie_id}?api_key={apikey}"
    r = requests.get(movie_url)
    
    if r.ok:
        movie = r.json()
        
        # Prepare movie details
        movie_details = {
            'id': movie.get('id'),
            'title': movie.get('title'),
            'release_date': movie.get('release_date'),
            'revenue': movie.get('revenue'),
            'budget': movie.get('budget'),
            'production_companies': ', '.join([company['name'] for company in movie.get('production_companies', [])]),
            'genres': ', '.join([genre['name'] for genre in movie.get('genres', [])]),
            'popularity': movie.get('popularity'),
            'vote_average': movie.get('vote_average'),
            'vote_count': movie.get('vote_count'),
            'overview': movie.get('overview')
        }

        # Get movie reviews
        reviews_url = f"{base_url}{movie_id}/reviews?api_key={apikey}"
        reviews_response = requests.get(reviews_url)
        
        if reviews_response.ok:
            reviews_data = reviews_response.json()
            reviews = reviews_data.get('results', [])
            
            # Get all movie reviews and combine them into one long review
            review_texts = [review['content'] for review in reviews]
            movie_details['reviews'] = ' '.join(review_texts) if review_texts else 'No reviews available'
        else:
            movie_details['reviews'] = 'No reviews available'
        
        movie_data.append(movie_details)
    
    # Add a delay to prevent overwhelming the API
    time.sleep(.1)

# Convert to data frame
movies_df = pd.DataFrame(movie_data)
movies_df.head(20)

Unnamed: 0,id,title,release_date,revenue,budget,production_companies,genres,popularity,vote_average,vote_count,overview,reviews
0,2,Ariel,1988-10-21,0,0,Villealfa Filmproductions,"Comedy, Drama, Romance, Crime",26.263,7.118,331,After the coal mine he works at closes and his...,No reviews available
1,3,Shadows in Paradise,1986-10-17,0,0,Villealfa Filmproductions,"Comedy, Drama, Romance",6.004,7.301,392,"Nikander, a rubbish collector and would-be ent...","Released in 1986, Aki Kaurismaki's <i>Varjoja ..."
2,5,Four Rooms,1995-12-09,4257354,4000000,"Miramax, A Band Apart",Comedy,15.3,5.846,2618,It's Ted the Bellhop's first night on the job....,No reviews available
3,6,Judgment Night,1993-10-15,12136938,21000000,"Largo Entertainment, JVC, Universal Pictures","Action, Crime, Thriller",22.862,6.5,331,"Four young friends, while taking a shortcut en...",No reviews available
4,8,Life in Loops (A Megacities RMX),2006-01-01,0,42000,inLoops,Documentary,2.925,7.3,28,Timo Novotny labels his new project an experim...,No reviews available
5,9,Sunday in August,2004-09-02,0,0,,Drama,3.174,7.1,27,,No reviews available
6,11,Star Wars,1977-05-25,775398007,11000000,"Lucasfilm Ltd., 20th Century Fox","Adventure, Action, Science Fiction",89.46,8.204,20446,Princess Leia is captured and held hostage by ...,"(As I'm writing this review, Darth Vader's the..."
7,12,Finding Nemo,2003-05-30,940335536,94000000,Pixar,"Animation, Family",108.046,7.819,19086,"Nemo, an adventurous young clownfish, is unexp...",One of the best animated films I have ever see...
8,13,Forrest Gump,1994-06-23,677387716,55000000,"Paramount Pictures, The Steve Tisch Company, W...","Comedy, Drama, Romance",83.259,8.473,27230,A man with a low IQ has accomplished great thi...,Best movie ever. This is the best movie ever! ...
9,14,American Beauty,1999-09-15,356296601,15000000,"DreamWorks Pictures, Jinks/Cohen Company",Drama,37.161,8.018,11916,"Lester Burnham, a depressed suburban father in...",The film American Beauty to me is a film about...


In [4]:
movies_df.to_csv('movies.csv', index = False)

NameError: name 'movies_df' is not defined