In [28]:
# dependencies
import requests
import json
import pandas as pd
from pprint import pprint
from api_keys import api_key

In [36]:
# parameters
base_url = "https://api.themoviedb.org/3/discover/movie"
region = 'US'
years = range(2000,2024,1)
pages = range(1,100,1)

# empty list for movie ID, name, popularity, avg rating, # of votes
movie_ids = []
movie_names = []
movie_pops = []
movie_vote_avgs = []
movie_vote_counts = []

for year in years:
    # set variable to keep track of pages without results
    bad_page = 0
    for page in pages:
        # finish setting up parameters
        params = {
            'api_key':api_key,
            'include_adult':False,
            'region':region,
            'primary_release_year':year,
            'sort_by':'popularity.desc',
            'page':page
        }
        # get data for given year
        response = requests.get(base_url, params=params).json()
        
        # set variables to see if there's useful data on the page 
        movie_found = False
        
        # go through data
        try:
            # counter for movies that don't fit the requirements so it can break earlier
            bad_result = 0
            # loop through data
            for result in response['results']:
                if (result['popularity'] > 20) & (result['vote_count'] > 100):
                    movie_found = True
                    movie_ids.append(result['id'])
                    movie_names.append(result['title'])
                    movie_pops.append(result['popularity'])
                    movie_vote_avgs.append(result['vote_average'])
                    movie_vote_counts.append(result['vote_count'])
                else:
                    # if 10 results (half) on page don't fit requirements, move on
                    bad_result += 1
                    if bad_result >= 10:
                        break
                    else:
                        pass
        except:
            print(f'Had an issue getting data from {year}, page: {page}.')
            pass
        # print if no data was found on the page
        if movie_found:
            bad_page = 0
            print(f'Got data from year: {year}, page: {page}.')
        else:
            bad_page += 1
            print(f'Not enough useful data on year: {year}, page: {page}.')
        # skip to next year if multiple pages in a row with NO useful info
        if bad_page >= 2:
            print(f'Multiple pages w/o useful data, skipping the rest of {year}.')
            break

Got data from year: 2000, page: 1.
Got data from year: 2000, page: 2.
Got data from year: 2000, page: 3.
Got data from year: 2000, page: 4.
Got data from year: 2000, page: 5.
Not enough useful data on year: 2000, page: 6.
Not enough useful data on year: 2000, page: 7.
Multiple pages w/o useful data, skipping the rest of 2000.
Got data from year: 2001, page: 1.
Got data from year: 2001, page: 2.
Got data from year: 2001, page: 3.
Got data from year: 2001, page: 4.
Got data from year: 2001, page: 5.
Not enough useful data on year: 2001, page: 6.
Not enough useful data on year: 2001, page: 7.
Multiple pages w/o useful data, skipping the rest of 2001.
Got data from year: 2002, page: 1.
Got data from year: 2002, page: 2.
Got data from year: 2002, page: 3.
Got data from year: 2002, page: 4.
Got data from year: 2002, page: 5.
Not enough useful data on year: 2002, page: 6.
Not enough useful data on year: 2002, page: 7.
Multiple pages w/o useful data, skipping the rest of 2002.
Got data from ye

Got data from year: 2018, page: 2.
Got data from year: 2018, page: 3.
Got data from year: 2018, page: 4.
Got data from year: 2018, page: 5.
Got data from year: 2018, page: 6.
Got data from year: 2018, page: 7.
Got data from year: 2018, page: 8.
Got data from year: 2018, page: 9.
Got data from year: 2018, page: 10.
Got data from year: 2018, page: 11.
Got data from year: 2018, page: 12.
Got data from year: 2018, page: 13.
Not enough useful data on year: 2018, page: 14.
Got data from year: 2018, page: 15.
Not enough useful data on year: 2018, page: 16.
Not enough useful data on year: 2018, page: 17.
Multiple pages w/o useful data, skipping the rest of 2018.
Got data from year: 2019, page: 1.
Got data from year: 2019, page: 2.
Got data from year: 2019, page: 3.
Got data from year: 2019, page: 4.
Got data from year: 2019, page: 5.
Got data from year: 2019, page: 6.
Got data from year: 2019, page: 7.
Got data from year: 2019, page: 8.
Got data from year: 2019, page: 9.
Got data from year: 20

In [38]:
# assemble df
movie_df = pd.DataFrame({
    'ID': movie_ids,
    'Name': movie_names,
    'Popularity Score': movie_pops,
    'Average Rating': movie_vote_avgs,
    'Number of Ratings': movie_vote_counts
})

# export
movie_df.to_csv('../Raw Data/2000s US')

movie_df

Unnamed: 0,ID,Name,Popularity Score,Average Rating,Number of Ratings
0,9600,Big Momma's House,95.366,5.8,2095
1,4247,Scary Movie,90.579,6.3,6240
2,10867,Malena,74.228,7.4,1961
3,11688,The Emperor's New Groove,95.121,7.5,6003
4,955,Mission: Impossible II,60.803,6.1,5980
...,...,...,...,...,...
3039,813726,A Tourist's Guide to Love,31.664,6.4,114
3040,869112,Somebody I Used to Know,30.695,6.1,115
3041,872954,The Old Way,21.762,6.0,110
3042,866413,You People,24.940,5.7,530


In [None]:
# code by Peter Solis