In [1]:
# dependencies
import requests
import json
import pandas as pd
from pprint import pprint
from api_keys import api_key

In [2]:
# parameters
base_url = "https://api.themoviedb.org/3/discover/movie"
region = 'US'
years = range(2000,2024,1)
pages = range(1,100,1)

# empty list for movie ID, name, popularity, avg rating, # of votes, release year
movie_ids = []
movie_names = []
movie_pops = []
movie_vote_avgs = []
movie_vote_counts = []
movie_release_yrs = []

for year in years:
    # set variable to keep track of pages without results
    bad_page = 0
    for page in pages:
        # finish setting up parameters
        params = {
            'api_key':api_key,
            'include_adult':False,
            'region':region,
            'primary_release_year':year,
            'sort_by':'popularity.desc',
            'page':page
        }
        # get data for given year
        response = requests.get(base_url, params=params).json()
        
        # set variables to see if there's useful data on the page 
        movie_found = False
        
        # go through data
        try:
            # counter for movies that don't fit the requirements so it can break earlier
            bad_result = 0
            # loop through data
            for result in response['results']:
                if (result['popularity'] > 20) & (result['vote_count'] > 100):
                    movie_found = True
                    movie_ids.append(result['id'])
                    movie_names.append(result['title'])
                    movie_pops.append(result['popularity'])
                    movie_vote_avgs.append(result['vote_average'])
                    movie_vote_counts.append(result['vote_count'])
                    movie_release_yrs.append(year)
                else:
                    # if 10 results (half) on page don't fit requirements, move on
                    bad_result += 1
                    if bad_result >= 10:
                        break
                    else:
                        pass
        except:
            print(f'Had an issue getting data from {year}, page: {page}.')
            pass
        # print if no data was found on the page
        if movie_found:
            bad_page = 0
            print(f'Got data from year: {year}, page: {page}.')
        else:
            bad_page += 1
            print(f'Not enough useful data on year: {year}, page: {page}.')
        # skip to next year if multiple pages in a row with NO useful info
        if bad_page >= 2:
            print(f'Multiple pages w/o useful data, skipping the rest of {year}.')
            break

Got data from year: 2000, page: 1.
Got data from year: 2000, page: 2.
Got data from year: 2000, page: 3.
Got data from year: 2000, page: 4.
Not enough useful data on year: 2000, page: 5.
Not enough useful data on year: 2000, page: 6.
Multiple pages w/o useful data, skipping the rest of 2000.
Got data from year: 2001, page: 1.
Got data from year: 2001, page: 2.
Got data from year: 2001, page: 3.
Got data from year: 2001, page: 4.
Not enough useful data on year: 2001, page: 5.
Not enough useful data on year: 2001, page: 6.
Multiple pages w/o useful data, skipping the rest of 2001.
Got data from year: 2002, page: 1.
Got data from year: 2002, page: 2.
Got data from year: 2002, page: 3.
Got data from year: 2002, page: 4.
Not enough useful data on year: 2002, page: 5.
Not enough useful data on year: 2002, page: 6.
Multiple pages w/o useful data, skipping the rest of 2002.
Got data from year: 2003, page: 1.
Got data from year: 2003, page: 2.
Got data from year: 2003, page: 3.
Got data from ye

Got data from year: 2021, page: 6.
Got data from year: 2021, page: 7.
Got data from year: 2021, page: 8.
Got data from year: 2021, page: 9.
Got data from year: 2021, page: 10.
Got data from year: 2021, page: 11.
Got data from year: 2021, page: 12.
Got data from year: 2021, page: 13.
Got data from year: 2021, page: 14.
Not enough useful data on year: 2021, page: 15.
Not enough useful data on year: 2021, page: 16.
Multiple pages w/o useful data, skipping the rest of 2021.
Got data from year: 2022, page: 1.
Got data from year: 2022, page: 2.
Got data from year: 2022, page: 3.
Got data from year: 2022, page: 4.
Got data from year: 2022, page: 5.
Got data from year: 2022, page: 6.
Got data from year: 2022, page: 7.
Got data from year: 2022, page: 8.
Got data from year: 2022, page: 9.
Got data from year: 2022, page: 10.
Got data from year: 2022, page: 11.
Got data from year: 2022, page: 12.
Got data from year: 2022, page: 13.
Got data from year: 2022, page: 14.
Got data from year: 2022, page

In [3]:
# assemble df
movie_df = pd.DataFrame({
    'Movie ID': movie_ids,
    'Title': movie_names,
    'Release Year': movie_release_yrs,
    'Popularity Score': movie_pops,
    'Average Rating': movie_vote_avgs,
    'Number of Ratings': movie_vote_counts
})

movie_df = movie_df.set_index('Movie ID')

# export
movie_df.to_csv('../Raw Data/2000s_US_IDs.csv')

movie_df

Unnamed: 0_level_0,Title,Release Year,Popularity Score,Average Rating,Number of Ratings
Movie ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
9600,Big Momma's House,2000,95.366,5.8,2095
11688,The Emperor's New Groove,2000,95.121,7.5,6003
4247,Scary Movie,2000,90.579,6.3,6240
98,Gladiator,2000,74.966,8.2,16701
10867,Malena,2000,74.228,7.4,1961
...,...,...,...,...,...
881164,Boston Strangler,2023,27.471,6.7,434
722149,Luther: The Fallen Sun,2023,27.272,6.8,672
844417,Marlowe,2023,23.636,6.2,160
1068141,Mighty Morphin Power Rangers: Once & Always,2023,21.831,6.6,196


In [None]:
# code by Peter Solis