### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [2]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [3]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
query_url = f"{url}&api-key={nyt_api_key}&fq={filter_query}&begin_date={begin_date}&end_date={end_date}&sort={sort}&fl={field_list}"

In [4]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for page in range(0,20):

    # create query with a page number
    query_url = f"{query_url}&page={page+1}"
    try: 
        
        # Make a "GET" request and retrieve the JSON
        response_json = requests.get(query_url).json()

        # API results show 10 articles at a time
        for review in response_json["response"]["docs"]:
            reviews_list.append(review)

        # Print the page that was just retrieved
        print(f"Checked page {page}")
        # Print(reviews_list[0])
    except: 
        print(f"Page number {page} had no results")


    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
 


Checked page 0
Checked page 1
Checked page 2
Checked page 3
Checked page 4
Checked page 5
Checked page 6
Checked page 7
Checked page 8
Checked page 9
Checked page 10
Checked page 11
Checked page 12
Checked page 13
Checked page 14
Checked page 15
Checked page 16
Checked page 17
Checked page 18
Checked page 19


In [5]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
first_five_reviews = reviews_list[0:5]
print(json.dumps(first_five_reviews, indent=4))

[
    {
        "web_url": "https://www.nytimes.com/2023/01/31/movies/pamela-a-love-story-review.html",
        "snippet": "This documentary from Ryan White rewinds, to powerful effect, on Pamela Anderson\u2019s life and fame.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018Pamela, a Love Story\u2019 Review: A Frank Look Back",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "Pamela, a Love Story",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Documentary Films and Programs",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "persons",
                "value": "Anderson, Pamela (1967- )",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",

In [6]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_list_df = pd.json_normalize(reviews_list)
reviews_list_df


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/01/31/movies/pame...,"This documentary from Ryan White rewinds, to p...",The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2023-01-31T12:00:05+0000,295,"‘Pamela, a Love Story’ Review: A Frank Look Back",,,"Pamela, a Love Story",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
1,https://www.nytimes.com/2023/01/19/movies/in-f...,"In Matt Carter’s gay rugby film, sports and ro...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-19T17:50:16+0000,281,‘In From the Side’ Review: Love and Rugby Play...,,,In From the Side,,,,By Kyle Turner,"[{'firstname': 'Kyle', 'middlename': None, 'la...",
2,https://www.nytimes.com/2023/01/19/movies/afte...,In this intelligent melodrama by the director ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-19T12:00:06+0000,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
3,https://www.nytimes.com/2023/01/05/movies/alca...,"In this naturalistic drama from Spain, a famil...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-05T12:00:03+0000,306,‘Alcarràs’ Review: Labor of Love,,,Alcarràs,,,,By Devika Girish,"[{'firstname': 'Devika', 'middlename': None, '...",
4,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2022/12/01/movies/lady...,The new Netflix adaptation of D.H. Lawrence’s ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-12-01T16:40:55+0000,768,‘Lady Chatterley’s Lover’ Review: When Connie ...,,,A Love Whose Name Is Often Spoken,,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
196,https://www.nytimes.com/2022/11/23/movies/soun...,A young singer gets more than she bargained fo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-23T15:00:09+0000,277,‘The Sound of Christmas’ Review: A Gospel Sing...,,,The Sound Of Christmas,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",
197,https://www.nytimes.com/2022/11/17/movies/the-...,Elegance Bratton’s autobiographical first feat...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:07+0000,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
198,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:06+0000,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",


In [7]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
reviews_list_df["title"] = reviews_list_df["headline.main"].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])
reviews_list_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/01/31/movies/pame...,"This documentary from Ryan White rewinds, to p...",The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2023-01-31T12:00:05+0000,295,"‘Pamela, a Love Story’ Review: A Frank Look Back",,,"Pamela, a Love Story",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",,"Pamela, a Love Story"
1,https://www.nytimes.com/2023/01/19/movies/in-f...,"In Matt Carter’s gay rugby film, sports and ro...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-19T17:50:16+0000,281,‘In From the Side’ Review: Love and Rugby Play...,,,In From the Side,,,,By Kyle Turner,"[{'firstname': 'Kyle', 'middlename': None, 'la...",,In From the Side
2,https://www.nytimes.com/2023/01/19/movies/afte...,In this intelligent melodrama by the director ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-19T12:00:06+0000,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",,After Love
3,https://www.nytimes.com/2023/01/05/movies/alca...,"In this naturalistic drama from Spain, a famil...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-05T12:00:03+0000,306,‘Alcarràs’ Review: Labor of Love,,,Alcarràs,,,,By Devika Girish,"[{'firstname': 'Devika', 'middlename': None, '...",,Alcarràs
4,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",,Nelly & Nadine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2022/12/01/movies/lady...,The new Netflix adaptation of D.H. Lawrence’s ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-12-01T16:40:55+0000,768,‘Lady Chatterley’s Lover’ Review: When Connie ...,,,A Love Whose Name Is Often Spoken,,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Lady Chatterley’s Lover
196,https://www.nytimes.com/2022/11/23/movies/soun...,A young singer gets more than she bargained fo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-23T15:00:09+0000,277,‘The Sound of Christmas’ Review: A Gospel Sing...,,,The Sound Of Christmas,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",,The Sound of Christmas
197,https://www.nytimes.com/2022/11/17/movies/the-...,Elegance Bratton’s autobiographical first feat...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:07+0000,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,The Inspection
198,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:06+0000,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Bones and All


In [8]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_list_df["keywords"] = reviews_list_df["keywords"].apply(extract_keywords)
reviews_list_df


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/01/31/movies/pame...,"This documentary from Ryan White rewinds, to p...",The New York Times,subject: Documentary Films and Programs;person...,2023-01-31T12:00:05+0000,295,"‘Pamela, a Love Story’ Review: A Frank Look Back",,,"Pamela, a Love Story",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",,"Pamela, a Love Story"
1,https://www.nytimes.com/2023/01/19/movies/in-f...,"In Matt Carter’s gay rugby film, sports and ro...",The New York Times,subject: Movies;creative_works: In From the Si...,2023-01-19T17:50:16+0000,281,‘In From the Side’ Review: Love and Rugby Play...,,,In From the Side,,,,By Kyle Turner,"[{'firstname': 'Kyle', 'middlename': None, 'la...",,In From the Side
2,https://www.nytimes.com/2023/01/19/movies/afte...,In this intelligent melodrama by the director ...,The New York Times,subject: Movies;creative_works: After Love (20...,2023-01-19T12:00:06+0000,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",,After Love
3,https://www.nytimes.com/2023/01/05/movies/alca...,"In this naturalistic drama from Spain, a famil...",The New York Times,"subject: Movies;persons: Simon, Carla;creative...",2023-01-05T12:00:03+0000,306,‘Alcarràs’ Review: Labor of Love,,,Alcarràs,,,,By Devika Girish,"[{'firstname': 'Devika', 'middlename': None, '...",,Alcarràs
4,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",,Nelly & Nadine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2022/12/01/movies/lady...,The new Netflix adaptation of D.H. Lawrence’s ...,The New York Times,subject: Movies;creative_works: Lady Chatterle...,2022-12-01T16:40:55+0000,768,‘Lady Chatterley’s Lover’ Review: When Connie ...,,,A Love Whose Name Is Often Spoken,,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Lady Chatterley’s Lover
196,https://www.nytimes.com/2022/11/23/movies/soun...,A young singer gets more than she bargained fo...,The New York Times,subject: Movies;persons: Ne-Yo;creative_works:...,2022-11-23T15:00:09+0000,277,‘The Sound of Christmas’ Review: A Gospel Sing...,,,The Sound Of Christmas,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",,The Sound of Christmas
197,https://www.nytimes.com/2022/11/17/movies/the-...,Elegance Bratton’s autobiographical first feat...,The New York Times,subject: Movies;creative_works: The Inspection...,2022-11-17T12:00:07+0000,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,The Inspection
198,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times,subject: Movies;creative_works: Bones and All ...,2022-11-17T12:00:06+0000,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Bones and All


In [9]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles = reviews_list_df["title"].to_list()
titles

['Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley’s Lover',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Policeman',
 'Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley’s Lover',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Policeman',
 'Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley’s Lover',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Policeman',
 'Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley’s Lover',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Policeman',
 'Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley’s Lover',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Po

### Access The Movie Database API

In [10]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key



In [11]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests
request_counter = 1

# Loop through the titles
    # Check if we need to sleep before making a request
    # Add 1 to the request counter
def getMovieDetails(search_url):

    headers = {
        "accept": "application/json",
        "Authorization": "Bearer " + str(tmdb_api_key)
    }
    response = requests.get(search_url, headers=headers).json()
    return response

for title in titles:
        request_counter = request_counter + 1
        if (request_counter %50 == 0):
                print("The application is sleeping")
                time.sleep(1)
    
    # Perform a "GET" request for The Movie Database
    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
        # Get movie id
        search_url = url + title + tmdb_key_string 
        response = getMovieDetails(search_url)
        
        try:
                movie_id = response["results"][0]["id"]
                #print(response)
                #print(movie_id)
                search_by_id = "https://api.themoviedb.org/3/movie/" + str(movie_id)
                movie_details = getMovieDetails(search_by_id)
                genres = movie_details["genres"]
                spoken_languages = movie_details["spoken_languages"]
                production_countries = movie_details["production_countries"]
                movie_dict = {}
                movie_dict["title"]= movie_details["title"] 
                movie_dict["original_title"]= movie_details["original_title"] 
                movie_dict["budget"]= movie_details["budget"] 
                movie_dict["original_language"]= movie_details["original_language"] 
                movie_dict["homepage"]= movie_details["homepage"] 
                movie_dict["overview"]= movie_details["overview"] 
                movie_dict["popularity"]= movie_details["popularity"] 
                movie_dict["runtime"]= movie_details["runtime"] 
                movie_dict["revenue"]= movie_details["revenue"] 
                movie_dict["release_date"]= movie_details["release_date"] 
                movie_dict["vote_average"]= movie_details["vote_average"] 
                movie_dict["vote_count"]= movie_details["vote_count"] 
                movie_dict["genres"] = genres 
                movie_dict["spoken_languages"]= spoken_languages 
                movie_dict["production_countries"]= production_countries
                tmdb_movies_list.append(movie_dict)
                print("Found " + title)      
        except IndexError:
                print(f"{title} not found.")

        # Make a request for a the full movie details

        # Execute "GET" request with url
        
        # Extract the genre names into a list
            
        # Extract the spoken_languages' English name into a list

        # Extract the production_countries' name into a list

        # Add the relevant data to a dictionary and

        # append it to the tmdb_movies_list list ({
       
        # Print out the title that was found



Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lad

In [12]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
first_five_movies = tmdb_movies_list[0:5]
print(json.dumps(first_five_movies, indent=4))

[
    {
        "title": "Pamela, A Love Story",
        "original_title": "Pamela, A Love Story",
        "budget": 0,
        "original_language": "en",
        "homepage": "https://www.netflix.com/title/81590934",
        "overview": "In her own words, through personal video and diaries, Pamela Anderson shares the story of her rise to fame, rocky romances and infamous sex tape scandal.",
        "popularity": 10.614,
        "runtime": 113,
        "revenue": 0,
        "release_date": "2023-01-30",
        "vote_average": 6.978,
        "vote_count": 207,
        "genres": [
            {
                "id": 99,
                "name": "Documentary"
            }
        ],
        "spoken_languages": [
            {
                "english_name": "English",
                "iso_639_1": "en",
                "name": "English"
            }
        ],
        "production_countries": [
            {
                "iso_3166_1": "US",
                "name": "United States of Amer

In [13]:
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(tmdb_movies_list)
tmdb_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,"Pamela, A Love Story","Pamela, A Love Story",0,en,https://www.netflix.com/title/81590934,"In her own words, through personal video and d...",10.614,113,0,2023-01-30,6.978,207,"[{'id': 99, 'name': 'Documentary'}]","[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'US', 'name': 'United States o..."
1,In from the Side,In from the Side,0,en,http://www.infromthesidemovie.com,"Mark, a new and inexperienced rugby club membe...",18.786,134,52885,2022-09-16,6.800,62,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...","[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]"
2,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,7.152,122,"[{'id': 18, 'name': 'Drama'}]","[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]"
3,Alcarràs,Alcarràs,0,ca,https://www.alcarras-film.com/,"In a small village in Catalonia, the peach far...",14.598,120,0,2022-04-29,6.867,180,"[{'id': 18, 'name': 'Drama'}]","[{'english_name': 'Catalan', 'iso_639_1': 'ca'...","[{'iso_3166_1': 'ES', 'name': 'Spain'}]"
4,Nelly and Monsieur Arnaud,Nelly et Mr. Arnaud,0,fr,,"Nelly leaves her lazy, unemployed husband to w...",15.338,106,0,1995-08-23,6.888,120,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...","[{'english_name': 'French', 'iso_639_1': 'fr',...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Lady Chatterley's Lover,Lady Chatterley's Lover,0,en,https://www.netflix.com/title/81476441,Unhappily married aristocrat Lady Chatterley b...,30.633,126,0,2022-11-22,6.620,392,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...","[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'..."
196,The Sound of Christmas,The Sound of Christmas,0,en,,A woman facing eviction just before Christmas ...,1.173,0,0,2022-11-24,8.000,4,"[{'id': 10770, 'name': 'TV Movie'}, {'id': 18,...","[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'US', 'name': 'United States o..."
197,The Inspection,The Inspection,0,en,https://a24films.com/films/inspection,"Ellis French is a young, gay Black man, reject...",26.393,95,270613,2022-11-18,6.179,56,"[{'id': 18, 'name': 'Drama'}]","[{'english_name': 'Arabic', 'iso_639_1': 'ar',...","[{'iso_3166_1': 'US', 'name': 'United States o..."
198,Bones and All,Bones and All,16000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",30.307,131,15234907,2022-11-18,7.068,1252,"[{'id': 18, 'name': 'Drama'}, {'id': 27, 'name...","[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'IT', 'name': 'Italy'}, {'iso_..."


### Merge and Clean the Data for Export

In [15]:
# Merge the New York Times reviews and TMDB DataFrames on title
combined_title_df = pd.merge(tmdb_df, reviews_list_df, how="inner", on="title")
combined_title_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
1,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
2,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
3,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
4,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2395,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
2396,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
2397,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
2398,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",


In [16]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ["genres", "spoken_languages", "production_countries" ]

# Create a list of characters to remove
characters_to_remove = ["[", "]", "'"]

# Loop through the list of columns to fix

for column in columns_to_fix:
    combined_title_df[column] = combined_title_df[column].astype(str)

    for character in characters_to_remove:
        combined_title_df[column] = combined_title_df[column].str.replace(character, "")

    # Convert the column to type 'str'


    # Loop through characters to remove


# Display the fixed DataFrame
#combined_title_df[columns_to_fix]
combined_title_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
1,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
2,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
3,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
4,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2395,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
2396,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
2397,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
2398,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",


In [17]:
# Drop "byline.person" column
combined_title_df.drop("byline.person",axis=1, inplace=True)
combined_title_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.organization
0,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,
1,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,
2,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,
3,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,
4,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2395,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,303,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,
2396,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,303,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,
2397,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,303,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,
2398,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,303,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,


In [18]:
# Delete duplicate rows and reset index
combined_title_df.drop_duplicates(inplace=True)
combined_title_df.reset_index(drop=True, inplace=True)
combined_title_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.organization
0,After Love,After Love,0,en,,Set in the port town of Dover in the South-Eas...,20.945,89,0,2021-06-04,...,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,
1,Alcarràs,Alcarràs,0,ca,https://www.alcarras-film.com/,"In a small village in Catalonia, the peach far...",14.598,120,0,2022-04-29,...,306,‘Alcarràs’ Review: Labor of Love,,,Alcarràs,,,,By Devika Girish,
2,The Sound of Christmas,The Sound of Christmas,0,en,,A woman facing eviction just before Christmas ...,1.173,0,0,2022-11-24,...,277,‘The Sound of Christmas’ Review: A Gospel Sing...,,,The Sound Of Christmas,,,,By Concepción de León,
3,The Inspection,The Inspection,0,en,https://a24films.com/films/inspection,"Ellis French is a young, gay Black man, reject...",26.393,95,270613,2022-11-18,...,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,
4,Bones and All,Bones and All,16000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",30.307,131,15234907,2022-11-18,...,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,
5,My Policeman,My Policeman,0,en,https://www.amazon.com/dp/B09Y8SHDPD,"In the late 1990s, the arrival of elderly inva...",24.341,113,0,2022-10-20,...,303,"‘My Policeman’ Review: Two Love Affairs, Equal...",,,My Policeman,,,,By Teo Bugbee,


In [19]:
# Export data to CSV without the index
combined_title_df.to_csv("collected_data_actual.csv", index=False)