### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [2]:
# Set environment variables from the .env in the local environment
load_dotenv('.env')

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [3]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:Movies AND type_of_material:Review AND headline:love'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

query_url = url+'api-key='+nyt_api_key+'&fq='+filter_query+'&sort='+sort+'&fl='+field_list+'&begin_date='+begin_date+'&end_date='+end_date

In [4]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for page in range(20):  

    # create query with a page number
    # API results show 10 articles at a time
    query_url2 = query_url+'&page=1'
    
    # Make a "GET" request and retrieve the JSON
    reviews = requests.get(query_url2).json()
    
    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
    # Try and save the reviews to the reviews_list
    try:
        # loop through the reviews["response"]["docs"] and append each review to the list
        for review in reviews["response"]["docs"]:
            reviews_list.append(review)
   
        # Print the page that was just retrieved
        print(f"A page with results: {page}")
    except:
        # Print the page number that had no results then break from the loop
        print(f"A page with no results: {page}")
        break

A page with results: 0
A page with results: 1
A page with results: 2
A page with results: 3
A page with results: 4
A page with results: 5
A page with results: 6
A page with results: 7
A page with results: 8
A page with results: 9
A page with results: 10
A page with results: 11
A page with results: 12
A page with results: 13
A page with results: 14
A page with results: 15
A page with results: 16
A page with results: 17
A page with results: 18
A page with results: 19


In [7]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
for item in range(5):
    print(f"Item {item}: {json.dumps(reviews_list[item], indent=4)}")

Item 0: {
    "web_url": "https://www.nytimes.com/2023/01/31/movies/pamela-a-love-story-review.html",
    "snippet": "This documentary from Ryan White rewinds, to powerful effect, on Pamela Anderson\u2019s life and fame.",
    "source": "The New York Times",
    "headline": {
        "main": "\u2018Pamela, a Love Story\u2019 Review: A Frank Look Back",
        "kicker": null,
        "content_kicker": null,
        "print_headline": "Pamela, a Love Story",
        "name": null,
        "seo": null,
        "sub": null
    },
    "keywords": [
        {
            "name": "subject",
            "value": "Documentary Films and Programs",
            "rank": 1,
            "major": "N"
        },
        {
            "name": "persons",
            "value": "Anderson, Pamela (1967- )",
            "rank": 2,
            "major": "N"
        },
        {
            "name": "persons",
            "value": "White, Ryan (Filmmaker)",
            "rank": 3,
            "major": "N"
        }

In [8]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = pd.json_normalize(reviews_list)

In [9]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
reviews_df['title'] = reviews_df['headline.main'].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])

In [10]:
# Extract 'name' and 'value' from items in "keywords" column
# Fix the "keywords" column by converting cells from a list to a string
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

reviews_df['keywords'] = reviews_df['keywords'].apply(extract_keywords)

In [13]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles = reviews_df['title'].to_list()

### Access The Movie Database API

In [14]:
## Prepareing The Movie Database query ##
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

## Creating  an empty list to store everything in ##
tmdb_movies_list = []

# Createing a request counter to sleep the requests after a multiple
# of 50 requests per call ##
request_counter = 1

## Looping through the titles ##
for title in titles:
    ## Checking if we need to sleep at all before making a request##
    if request_counter % 50 == 0:
        time.sleep(1)
        print(f"Sleeping at {request_counter} requests")
    
    ## Add 1 to the request counter ##
    request_counter += 1
    
    # Perform a "GET" request for The Movie Database
    response = requests.get(url + title + tmdb_key_string)
    data = response.json()
    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try:
        # Get movie id
        movie_id = data["results"][0]["id"]
        
        # Make a request for a the full movie details
        query_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"
        
        # Execute "GET" request with url
        data = requests.get(query_url).json()
        
        # Extract the genre names into a list
        genres = []
        for genre in data['genres']:
            genres.append(
                genre["name"]
            )
        
        # Extract the spoken_languages' English name into a list
        spoken_languages = []
        for language in data['spoken_languages']:
            spoken_languages.append(
                language["english_name"]
            )
        
        # Extract the production_countries' name into a list
        production_countries = []
        for country in data['production_countries']:
            production_countries.append(
                country["name"]
            )
        
        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list
        tmdb_movies_list.append(
            {
                "title": data['title'],
                "original_title": data['original_title'],
                "budget": data['budget'],
                "genre": genres,
                "language": data['original_language'],
                "spoken_languages": spoken_languages,
                "homepage": data['homepage'],
                "overview": data['overview'],
                "popularity": data['popularity'],
                "runtime": data['runtime'],
                "revenue": data['revenue'],
                "release_date": data['release_date'],
                "vote_average": data['vote_average'],
                "vote_count": data['vote_count'],
                "production_countries": production_countries
            }
        )
        # Print out the title that was found
        print(f"Found {title}")
    except:
        print(title + " not found.")

Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lad

In [15]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
for item in range(5):
    print(f"Item {item}: {json.dumps(tmdb_movies_list[item], indent=4)}")

Item 0: {
    "title": "Pamela, A Love Story",
    "original_title": "Pamela, A Love Story",
    "budget": 0,
    "genre": [
        "Documentary"
    ],
    "language": "en",
    "spoken_languages": [
        "English"
    ],
    "homepage": "https://www.netflix.com/title/81590934",
    "overview": "In her own words, through personal video and diaries, Pamela Anderson shares the story of her rise to fame, rocky romances and infamous sex tape scandal.",
    "popularity": 17.091,
    "runtime": 113,
    "revenue": 0,
    "release_date": "2023-01-30",
    "vote_average": 7.0,
    "vote_count": 202,
    "production_countries": [
        "United States of America"
    ]
}
Item 1: {
    "title": "In from the Side",
    "original_title": "In from the Side",
    "budget": 0,
    "genre": [
        "Drama",
        "Romance"
    ],
    "language": "en",
    "spoken_languages": [
        "English"
    ],
    "homepage": "http://www.infromthesidemovie.com",
    "overview": "Mark, a new and inexp

In [16]:
# Convert the results to a DataFrame called tmdb_df with pd.DataFrame(). 
# You don't need to use json_normalize() this time because we don't have nested objects.
tmdb_df = pd.DataFrame(tmdb_movies_list)
tmdb_df

Unnamed: 0,title,original_title,budget,genre,language,spoken_languages,homepage,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,production_countries
0,"Pamela, A Love Story","Pamela, A Love Story",0,[Documentary],en,[English],https://www.netflix.com/title/81590934,"In her own words, through personal video and d...",17.091,113,0,2023-01-30,7.000,202,[United States of America]
1,In from the Side,In from the Side,0,"[Drama, Romance]",en,[English],http://www.infromthesidemovie.com,"Mark, a new and inexperienced rugby club membe...",11.710,134,0,2022-09-16,6.780,59,[United Kingdom]
2,After Love,After Love,0,[Drama],en,"[English, Arabic, French, Urdu]",,Set in the port town of Dover in the South-Eas...,15.106,89,0,2021-06-04,7.155,116,[United Kingdom]
3,Alcarràs,Alcarràs,0,[Drama],ca,[Catalan],https://www.alcarras-film.com/,"In a small village in Catalonia, the peach far...",17.212,120,0,2022-04-29,6.867,173,[Spain]
4,Nelly and Monsieur Arnaud,Nelly et Mr. Arnaud,0,"[Drama, Romance]",fr,[French],,"Nelly leaves her lazy, unemployed husband to w...",11.343,106,0,1995-08-23,6.900,116,"[Germany, France, Italy]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Lady Chatterley's Lover,Lady Chatterley's Lover,0,"[Drama, Romance]",en,[English],https://www.netflix.com/title/81476441,Unhappily married aristocrat Lady Chatterley b...,31.252,126,0,2022-11-22,6.542,372,"[United Kingdom, United States of America]"
196,The Sound of Christmas,The Sound of Christmas,0,"[TV Movie, Drama]",en,[English],,A woman facing eviction just before Christmas ...,1.872,0,0,2022-11-24,8.000,4,[United States of America]
197,The Inspection,The Inspection,0,[Drama],en,"[Arabic, Spanish, English]",https://a24films.com/films/inspection,"Ellis French is a young, gay Black man, reject...",24.176,95,270613,2022-11-18,6.100,53,[United States of America]
198,Bones and All,Bones and All,18000000,"[Drama, Horror, Romance]",it,[English],https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",38.023,131,15100000,2022-11-18,7.076,1212,"[Italy, United States of America]"


### Merge and Clean the Data for Export

In [None]:
# Merge the New York Times reviews and TMDB DataFrames on title


In [None]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing


# Create a list of characters to remove


# Loop through the list of columns to fix

    # Convert the column to type 'str'


    # Loop through characters to remove


# Display the fixed DataFrame


In [None]:
# Drop "byline.person" column


In [None]:
# Delete duplicate rows and reset index


In [None]:
# Export data to CSV without the index
