### Import Required Libraries and Set Up Environment Variables

In [31]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [33]:
# Set environment variables from the .env in the local environment
load_dotenv('.env')

nyt_api_key = os.getenv('NYT')
tmdb_api_key = os.getenv('TMDB')

# I really struggled to get the API's to work. 
#I used AskBCS where they were finally able to help me get this figured out. 
# print(nyt_api_key, tmdb_api_key)

### Access the New York Times API

In [36]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
query_url = (
    f"{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}"
    + f'&fq={filter_query}&sort={sort}&fl={field_list}'
)

In [38]:
# print query url
# print(query_url)

#checking on the url

In [40]:
# Print `response_data variable`
reviews_df = requests.get(query_url).json()
reviews_df

#checking to see what response I am getting

{'status': 'OK',
 'copyright': 'Copyright (c) 2024 The New York Times Company. All Rights Reserved.',
 'response': {'docs': [{'web_url': 'https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html',
    'snippet': 'A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.',
    'source': 'The New York Times',
    'headline': {'main': '‘The Attachment Diaries’ Review: Love, Sick',
     'kicker': None,
     'content_kicker': None,
     'print_headline': 'The Attachment Diaries',
     'name': None,
     'seo': None,
     'sub': None},
    'keywords': [{'name': 'subject',
      'value': 'Movies',
      'rank': 1,
      'major': 'N'},
     {'name': 'creative_works',
      'value': 'The Attachment Diaries (Movie)',
      'rank': 2,
      'major': 'N'},
     {'name': 'persons',
      'value': 'Diment, Valentin Javier',
      'rank': 3,
      'major': 'N'}],
    'pub_date': '2023-05-25T11:00:03+0000',
    'byline': {'orig

In [27]:
# Trying to see how many rows and columns the data has
#rows, columns = reviews_df.shape
#print(f"The DataFrame has{rows} rows and {columns} columns.")

In [44]:
#reviews_df.info()
#looking at column names and data types

In [46]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for page_number in range(20):   

    # Set up the base query URL
    query_url = f"{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}"
   
    # create query with a page number
    query_url_with_page = f'{query_url}&page={page_number}'
       
    # API results show 10 articles at a time
    print(f"Checked page: {page_number}")
    
    # Make a "GET" request and retrieve the JSON
    reviews = requests.get(query_url_with_page).json()
            
    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
   
    if not reviews["response"]["docs"]:
        print(f"No results on page {page_number}")
        break  # Break from the loop if no results
    try:
        # If there are results, loop through and save each review
        for review in reviews["response"]["docs"]:
            reviews_list.append(reviews)
    except: 
        # Print the reviews added from the current page
        print(f"Page {page_number} reviews added to list.")

# I used Xpert and ChatGPT and looking at homework to generate this code

Checked page: 0
Checked page: 1
Checked page: 2
Checked page: 3
Checked page: 4
Checked page: 5
Checked page: 6
Checked page: 7
Checked page: 8
Checked page: 9
Checked page: 10
Checked page: 11
Checked page: 12
Checked page: 13
Checked page: 14
Checked page: 15
Checked page: 16
Checked page: 17
Checked page: 18
Checked page: 19


In [55]:
# Preview the first 5 results in JSON format using json.dumps
# with the argument indent=4 to format the data

print(json.dumps(reviews_list[:5], indent=4))  

#print(json.dumps(reviews_list[:5], indent=4))
# I used Xpert and ChatGPT and looking at homework to generate this code

[
    {
        "status": "OK",
        "copyright": "Copyright (c) 2024 The New York Times Company. All Rights Reserved.",
        "response": {
            "docs": [
                {
                    "abstract": "The Swiss event replaced James Levine with the Finnish maestro Esa-Pekka Salonen, and the orchestra responded with a crisp performance.",
                    "web_url": "https://www.nytimes.com/2015/07/21/arts/international/review-verbier-festival-recovers-in-style-with-finnish-maestro.html",
                    "snippet": "The Swiss event replaced James Levine with the Finnish maestro Esa-Pekka Salonen, and the orchestra responded with a crisp performance.",
                    "lead_paragraph": "VERBIER, Switzerland \u2014 No one could have been shocked when James Levine announced on July 10 that because of an ear infection, he would have to cancel his trip here, high in the Alps east of Geneva, to open the 22nd season of the Verbier Festival last Friday. This was to h

In [57]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = pd.json_normalize(reviews_list)

# Display the DataFrame
reviews_df

Unnamed: 0,status,copyright,response.docs,response.meta.hits,response.meta.offset,response.meta.time
0,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,85
1,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,85
2,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,85
3,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,85
4,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,85
...,...,...,...,...,...,...
195,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,84
196,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,84
197,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,84
198,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,84


In [53]:
# Trying to see how many rows and columns the data has
rows, columns = reviews_df.shape
print(f"The DataFrame has{rows} rows and {columns} columns.")



The DataFrame has200 rows and 6 columns.


In [39]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()

from pandas import json_normalize

# Convert results_list to a Pandas DataFrame
reviews_df = json_normalize(reviews_list)

# Display the DataFrame
reviews_df

Unnamed: 0,status,copyright,response.docs,response.meta.hits,response.meta.offset,response.meta.time
0,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,80
1,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,80
2,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,80
3,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,80
4,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The Swiss event replaced James ...,740113,0,80
...,...,...,...,...,...,...
195,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,88
196,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,88
197,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,88
198,OK,Copyright (c) 2024 The New York Times Company....,[{'abstract': 'The gunman used a semiautomatic...,740113,190,88


In [15]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()

import pandas as pd
from pandas import json_normalize

# Convert results_list to a Pandas DataFrame
reviews_df = json_normalize(reviews_list)

# Display the DataFrame
reviews_df





Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,keywords,pub_date,document_type,news_desk,...,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,print_section,print_page
0,The Swiss event replaced James Levine with the...,https://www.nytimes.com/2015/07/21/arts/intern...,The Swiss event replaced James Levine with the...,"VERBIER, Switzerland — No one could have been ...",International New York Times,"[{'rank': 0, 'subtype': 'watch308', 'caption':...","[{'name': 'glocations', 'value': 'Switzerland'...",2015-07-20T10:53:05+0000,article,Culture,...,,Verbier Festival Recovers in Style,,,,By James R. Oestreich,"[{'firstname': 'James', 'middlename': 'R.', 'l...",,,
1,What the journey of a Union Pacific locomotive...,https://www.nytimes.com/2018/03/30/business/ec...,What the journey of a Union Pacific locomotive...,"CHICAGO — If North America were a factory, Uni...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'organizations', 'value': 'Union Pac...",2018-03-30T21:31:00+0000,article,Business,...,,It Nourishes the Continent. Now Its Lifeblood ...,,,,By Ana Swanson,"[{'firstname': 'Ana', 'middlename': None, 'las...",,B,1
2,State Department officials said the move was a...,https://www.nytimes.com/2020/08/13/us/politics...,State Department officials said the move was a...,WASHINGTON — The State Department announced on...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Chinese Languag...",2020-08-13T22:14:24+0000,article,Washington,...,,,,,,By Edward Wong,"[{'firstname': 'Edward', 'middlename': None, '...",,,
3,"Raking in cash for Biden, the former president...",https://www.nytimes.com/2020/07/30/us/politics...,"Raking in cash for Biden, the former president...","Obama takes the gloves off for Biden, and Cong...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Obama, Barack',...",2020-07-30T11:26:42+0000,article,Politics,...,,,,,,By Giovanni Russonello,"[{'firstname': 'Giovanni', 'middlename': None,...",,,
4,"Banks have tightened standards, becoming more ...",https://www.nytimes.com/2020/08/04/your-money/...,"Banks have tightened standards, becoming more ...","As public school teachers, Tori Smith and her ...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Mortgages', 'ra...",2020-08-04T15:29:25+0000,article,Business,...,,Borrowing Is Cheap. The Catch? Qualifying.,,,,By Tara Siegel Bernard,"[{'firstname': 'Tara', 'middlename': 'Siegel',...",,B,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"Iceland, the smallest nation ever to compete a...",https://www.nytimes.com/2016/06/23/sports/socc...,"Iceland, the smallest nation ever to compete a...",A late winning goal. A spot in the knockout st...,The New York Times,"[{'rank': 0, 'subtype': 'watch308', 'caption':...","[{'name': 'subject', 'value': 'Soccer', 'rank'...",2016-06-23T00:50:24+0000,article,Sports,...,,"With Late Goal, Iceland Advances",,,,,[],,B,10
196,The U.S. withdrew its forces from Afghanistan ...,https://www.nytimes.com/2022/02/10/opinion/bid...,The U.S. withdrew its forces from Afghanistan ...,U.S. troops in Iraq quietly thwarted two separ...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Iraq War (2003-...",2022-02-10T21:21:24+0000,article,OpEd,...,,U.S. Forces Should Exit Iraq,,,,By Trita Parsi and Adam N. Weinstein,"[{'firstname': 'Trita', 'middlename': None, 'l...",,A,26
197,The episode began with a report of a $20 count...,https://www.nytimes.com/2020/05/29/us/derek-ch...,The episode began with a report of a $20 count...,MINNEAPOLIS — One was a veteran of the Minneap...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Floyd, George (...",2020-05-29T23:48:27+0000,article,National,...,,Fatal Encounter Wasn’t First Time Paths Crossed,,,,"By Matt Furber, Audra D. S. Burch and Frances ...","[{'firstname': 'Matt', 'middlename': None, 'la...",,A,1
198,"Ross Lovegrove, a British product designer, te...",https://wheels.blogs.nytimes.com/2013/04/09/re...,"Ross Lovegrove, a British product designer, te...",Renault unveiled a striking blue concept car b...,The New York Times,[],"[{'name': 'subject', 'value': 'Art', 'rank': 1...",2013-04-09T18:38:32+0000,article,,...,,Renault Shows Concept At Milan Furniture Fair,,,,By Phil Patton,"[{'firstname': 'Phil', 'middlename': None, 'la...",,AU,3


In [17]:
reviews_df.columns
#I added this additional step so I could see all of the columns

Index(['abstract', 'web_url', 'snippet', 'lead_paragraph', 'source',
       'multimedia', 'keywords', 'pub_date', 'document_type', 'news_desk',
       'section_name', 'subsection_name', 'type_of_material', '_id',
       'word_count', 'uri', 'headline.main', 'headline.kicker',
       'headline.content_kicker', 'headline.print_headline', 'headline.name',
       'headline.seo', 'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'print_section', 'print_page'],
      dtype='object')

In [19]:
print(reviews_df['headline.main'])
# I added this additional step so I could see what was in the column 'headline.main'

0      Review: Verbier Festival Recovers in Style wit...
1      ‘It’s Factory North America,’ but Trump Could ...
2      U.S. Labels Chinese Language Education Group a...
3                          Obama (Privately) Slams Trump
4      Interest Rates Are Low, but Loans Are Harder t...
                             ...                        
195             Iceland Advances at Euros With Late Goal
196               Why Are American Troops Still in Iraq?
197    What Happened in the Chaotic Moments Before Ge...
198    Renault Shows Edgy Concept at Milan Furniture ...
199               Sell Them Trees, Then Steal Them Blind
Name: headline.main, Length: 200, dtype: object


In [21]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
reviews_df["title"] = reviews_df["headline.main"].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])
reviews_df


Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,keywords,pub_date,document_type,news_desk,...,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,print_section,print_page,title
0,The Swiss event replaced James Levine with the...,https://www.nytimes.com/2015/07/21/arts/intern...,The Swiss event replaced James Levine with the...,"VERBIER, Switzerland — No one could have been ...",International New York Times,"[{'rank': 0, 'subtype': 'watch308', 'caption':...","[{'name': 'glocations', 'value': 'Switzerland'...",2015-07-20T10:53:05+0000,article,Culture,...,Verbier Festival Recovers in Style,,,,By James R. Oestreich,"[{'firstname': 'James', 'middlename': 'R.', 'l...",,,,Review: Verbier Festival Recovers in Style wit...
1,What the journey of a Union Pacific locomotive...,https://www.nytimes.com/2018/03/30/business/ec...,What the journey of a Union Pacific locomotive...,"CHICAGO — If North America were a factory, Uni...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'organizations', 'value': 'Union Pac...",2018-03-30T21:31:00+0000,article,Business,...,It Nourishes the Continent. Now Its Lifeblood ...,,,,By Ana Swanson,"[{'firstname': 'Ana', 'middlename': None, 'las...",,B,1,"It’s Factory North America,’ but Trump Could H..."
2,State Department officials said the move was a...,https://www.nytimes.com/2020/08/13/us/politics...,State Department officials said the move was a...,WASHINGTON — The State Department announced on...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Chinese Languag...",2020-08-13T22:14:24+0000,article,Washington,...,,,,,By Edward Wong,"[{'firstname': 'Edward', 'middlename': None, '...",,,,U.S. Labels Chinese Language Education Group a...
3,"Raking in cash for Biden, the former president...",https://www.nytimes.com/2020/07/30/us/politics...,"Raking in cash for Biden, the former president...","Obama takes the gloves off for Biden, and Cong...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Obama, Barack',...",2020-07-30T11:26:42+0000,article,Politics,...,,,,,By Giovanni Russonello,"[{'firstname': 'Giovanni', 'middlename': None,...",,,,Obama (Privately) Slams Trum
4,"Banks have tightened standards, becoming more ...",https://www.nytimes.com/2020/08/04/your-money/...,"Banks have tightened standards, becoming more ...","As public school teachers, Tori Smith and her ...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Mortgages', 'ra...",2020-08-04T15:29:25+0000,article,Business,...,Borrowing Is Cheap. The Catch? Qualifying.,,,,By Tara Siegel Bernard,"[{'firstname': 'Tara', 'middlename': 'Siegel',...",,B,1,"Interest Rates Are Low, but Loans Are Harder t..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"Iceland, the smallest nation ever to compete a...",https://www.nytimes.com/2016/06/23/sports/socc...,"Iceland, the smallest nation ever to compete a...",A late winning goal. A spot in the knockout st...,The New York Times,"[{'rank': 0, 'subtype': 'watch308', 'caption':...","[{'name': 'subject', 'value': 'Soccer', 'rank'...",2016-06-23T00:50:24+0000,article,Sports,...,"With Late Goal, Iceland Advances",,,,,[],,B,10,Iceland Advances at Euros With Late Goa
196,The U.S. withdrew its forces from Afghanistan ...,https://www.nytimes.com/2022/02/10/opinion/bid...,The U.S. withdrew its forces from Afghanistan ...,U.S. troops in Iraq quietly thwarted two separ...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Iraq War (2003-...",2022-02-10T21:21:24+0000,article,OpEd,...,U.S. Forces Should Exit Iraq,,,,By Trita Parsi and Adam N. Weinstein,"[{'firstname': 'Trita', 'middlename': None, 'l...",,A,26,Why Are American Troops Still in Iraq
197,The episode began with a report of a $20 count...,https://www.nytimes.com/2020/05/29/us/derek-ch...,The episode began with a report of a $20 count...,MINNEAPOLIS — One was a veteran of the Minneap...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Floyd, George (...",2020-05-29T23:48:27+0000,article,National,...,Fatal Encounter Wasn’t First Time Paths Crossed,,,,"By Matt Furber, Audra D. S. Burch and Frances ...","[{'firstname': 'Matt', 'middlename': None, 'la...",,A,1,What Happened in the Chaotic Moments Before Ge...
198,"Ross Lovegrove, a British product designer, te...",https://wheels.blogs.nytimes.com/2013/04/09/re...,"Ross Lovegrove, a British product designer, te...",Renault unveiled a striking blue concept car b...,The New York Times,[],"[{'name': 'subject', 'value': 'Art', 'rank': 1...",2013-04-09T18:38:32+0000,article,,...,Renault Shows Concept At Milan Furniture Fair,,,,By Phil Patton,"[{'firstname': 'Phil', 'middlename': None, 'la...",,AU,3,Renault Shows Edgy Concept at Milan Furniture Fai


In [23]:
print(reviews_df['title'])

0      Review: Verbier Festival Recovers in Style wit...
1      It’s Factory North America,’ but Trump Could H...
2      U.S. Labels Chinese Language Education Group a...
3                           Obama (Privately) Slams Trum
4      Interest Rates Are Low, but Loans Are Harder t...
                             ...                        
195              Iceland Advances at Euros With Late Goa
196                Why Are American Troops Still in Iraq
197    What Happened in the Chaotic Moments Before Ge...
198    Renault Shows Edgy Concept at Milan Furniture Fai
199                Sell Them Trees, Then Steal Them Blin
Name: title, Length: 200, dtype: object


In [25]:
reviews_df['keywords']

0      [{'name': 'glocations', 'value': 'Switzerland'...
1      [{'name': 'organizations', 'value': 'Union Pac...
2      [{'name': 'subject', 'value': 'Chinese Languag...
3      [{'name': 'persons', 'value': 'Obama, Barack',...
4      [{'name': 'subject', 'value': 'Mortgages', 'ra...
                             ...                        
195    [{'name': 'subject', 'value': 'Soccer', 'rank'...
196    [{'name': 'subject', 'value': 'Iraq War (2003-...
197    [{'name': 'persons', 'value': 'Floyd, George (...
198    [{'name': 'subject', 'value': 'Art', 'rank': 1...
199    [{'name': 'subject', 'value': 'Movies', 'rank'...
Name: keywords, Length: 200, dtype: object

In [27]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_df['keywords'] = reviews_df['keywords'].apply(extract_keywords)

reviews_df

Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,keywords,pub_date,document_type,news_desk,...,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,print_section,print_page,title
0,The Swiss event replaced James Levine with the...,https://www.nytimes.com/2015/07/21/arts/intern...,The Swiss event replaced James Levine with the...,"VERBIER, Switzerland — No one could have been ...",International New York Times,"[{'rank': 0, 'subtype': 'watch308', 'caption':...",glocations: Switzerland;organizations: Metropo...,2015-07-20T10:53:05+0000,article,Culture,...,Verbier Festival Recovers in Style,,,,By James R. Oestreich,"[{'firstname': 'James', 'middlename': 'R.', 'l...",,,,Review: Verbier Festival Recovers in Style wit...
1,What the journey of a Union Pacific locomotive...,https://www.nytimes.com/2018/03/30/business/ec...,What the journey of a Union Pacific locomotive...,"CHICAGO — If North America were a factory, Uni...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",organizations: Union Pacific Corporation;subje...,2018-03-30T21:31:00+0000,article,Business,...,It Nourishes the Continent. Now Its Lifeblood ...,,,,By Ana Swanson,"[{'firstname': 'Ana', 'middlename': None, 'las...",,B,1,"It’s Factory North America,’ but Trump Could H..."
2,State Department officials said the move was a...,https://www.nytimes.com/2020/08/13/us/politics...,State Department officials said the move was a...,WASHINGTON — The State Department announced on...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",subject: Chinese Language;subject: Foreign Stu...,2020-08-13T22:14:24+0000,article,Washington,...,,,,,By Edward Wong,"[{'firstname': 'Edward', 'middlename': None, '...",,,,U.S. Labels Chinese Language Education Group a...
3,"Raking in cash for Biden, the former president...",https://www.nytimes.com/2020/07/30/us/politics...,"Raking in cash for Biden, the former president...","Obama takes the gloves off for Biden, and Cong...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","persons: Obama, Barack;persons: Biden, Joseph ...",2020-07-30T11:26:42+0000,article,Politics,...,,,,,By Giovanni Russonello,"[{'firstname': 'Giovanni', 'middlename': None,...",,,,Obama (Privately) Slams Trum
4,"Banks have tightened standards, becoming more ...",https://www.nytimes.com/2020/08/04/your-money/...,"Banks have tightened standards, becoming more ...","As public school teachers, Tori Smith and her ...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",subject: Mortgages;subject: Credit and Debt;su...,2020-08-04T15:29:25+0000,article,Business,...,Borrowing Is Cheap. The Catch? Qualifying.,,,,By Tara Siegel Bernard,"[{'firstname': 'Tara', 'middlename': 'Siegel',...",,B,1,"Interest Rates Are Low, but Loans Are Harder t..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"Iceland, the smallest nation ever to compete a...",https://www.nytimes.com/2016/06/23/sports/socc...,"Iceland, the smallest nation ever to compete a...",A late winning goal. A spot in the knockout st...,The New York Times,"[{'rank': 0, 'subtype': 'watch308', 'caption':...",subject: Soccer;glocations: Iceland;glocations...,2016-06-23T00:50:24+0000,article,Sports,...,"With Late Goal, Iceland Advances",,,,,[],,B,10,Iceland Advances at Euros With Late Goa
196,The U.S. withdrew its forces from Afghanistan ...,https://www.nytimes.com/2022/02/10/opinion/bid...,The U.S. withdrew its forces from Afghanistan ...,U.S. troops in Iraq quietly thwarted two separ...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",subject: Iraq War (2003-11);subject: Afghanist...,2022-02-10T21:21:24+0000,article,OpEd,...,U.S. Forces Should Exit Iraq,,,,By Trita Parsi and Adam N. Weinstein,"[{'firstname': 'Trita', 'middlename': None, 'l...",,A,26,Why Are American Troops Still in Iraq
197,The episode began with a report of a $20 count...,https://www.nytimes.com/2020/05/29/us/derek-ch...,The episode began with a report of a $20 count...,MINNEAPOLIS — One was a veteran of the Minneap...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","persons: Floyd, George (d 2020);persons: Chauv...",2020-05-29T23:48:27+0000,article,National,...,Fatal Encounter Wasn’t First Time Paths Crossed,,,,"By Matt Furber, Audra D. S. Burch and Frances ...","[{'firstname': 'Matt', 'middlename': None, 'la...",,A,1,What Happened in the Chaotic Moments Before Ge...
198,"Ross Lovegrove, a British product designer, te...",https://wheels.blogs.nytimes.com/2013/04/09/re...,"Ross Lovegrove, a British product designer, te...",Renault unveiled a striking blue concept car b...,The New York Times,[],subject: Art;subject: Automobiles;subject: Des...,2013-04-09T18:38:32+0000,article,,...,Renault Shows Concept At Milan Furniture Fair,,,,By Phil Patton,"[{'firstname': 'Phil', 'middlename': None, 'la...",,AU,3,Renault Shows Edgy Concept at Milan Furniture Fai


In [40]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
title_list = reviews_df['title'].to_list()
title_list

['Review: Verbier Festival Recovers in Style with Finnish Maestr',
 'It’s Factory North America,’ but Trump Could Hobble I',
 'U.S. Labels Chinese Language Education Group a Diplomatic Missio',
 'Obama (Privately) Slams Trum',
 'Interest Rates Are Low, but Loans Are Harder to Get. Here’s Why',
 'The Voice’ and ‘Gentefied',
 'This Is Not Tom Brady’s Epilogu',
 'Pop and Rock Listings for March 29-April ',
 'The Evangelical Case Against Judge Kavanaug',
 'Here’s Another Fun Thing You Can Do',
 'Suntan,’ and Refusing to Let G',
 'After F.B.I.’s Inquiry Into Omar Mateen, a Focus on What Else Could Be Don',
 'Brett Howden Stands Out in the Rangers’ Youth Movemen',
 'It’s Him, Not He',
 'Supreme Court, Republican Party, Poland: Your Wednesday Evening Briefin',
 'Betting on Default',
 'With Inflation, Workers Are Facing Return-to-Office Sticker Shoc',
 'Covid Forces Families to Rethink Nursing Home Car',
 'Killer Robots.',
 'Wonder Woman and Her Evolving Loo',
 'Giants Add a Center, the First 

### Access The Movie Database API

In [42]:
# Prepare The Movie Database query
nyt_api_key = os.getenv('NYT')
tmdb_api_key = os.getenv('TMDB')

url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key


In [44]:
    # Perform a "GET" request for The Movie Database
reviews = requests.get(author_query_url + author)

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.


# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests
request_counter = 0

# Loop through the titles
for title in title_list:
    # Check if we need to sleep before making a request
    if request_counter % 50 == 0 and request_counter != 0:
        time.sleep(1)  # Sleep for 1 second after every 50 requests

    # Increment the request counter
    request_counter += 1

    # Perform a "GET" request to The Movie Database
    results = requests.get(query_url_with_page).json()
    response = requests.get(f"{url}?api_key={tmdb_key_string}&query={title}")
    response_data = response.json()

    try:
        # Get movie ID
        movie_ID = response_data['results'][0]['id']
        
        # Make a request for the full movie details
        details_url = f"https://api.themoviedb.org/3/movie/{movie_ID}?api_key={tmdb_key_string}"
        full_response = requests.get(details_url)
        full_data = full_response.json()

        # Extract relevant details
        genres = [genre['name'] for genre in full_data.get('genres', [])]
        spoken_languages = [lang['english_name'] for lang in full_data.get('spoken_languages', [])]
        production_countries = [country['name'] for country in full_data.get('production_countries', [])]

        # Store the relevant data in a dictionary
        movie_info = {
            "title": title,
            "genres": genres,
            "spoken_languages": spoken_languages,
            "production_countries": production_countries
        }

        # Append the dictionary to the results list
        mov_results_list.append(movie_info)

        # Print out the title that was found
        print(f"Found {title}")

    except (IndexError, KeyError):
        # Handle the case where the movie is not found or there is an issue with the response
        print(f"{title} not found or there was an error retrieving the data.")

# The mov_results_list now contains the extracted movie data


Review: Verbier Festival Recovers in Style with Finnish Maestr not found or there was an error retrieving the data.
It’s Factory North America,’ but Trump Could Hobble I not found or there was an error retrieving the data.
U.S. Labels Chinese Language Education Group a Diplomatic Missio not found or there was an error retrieving the data.
Obama (Privately) Slams Trum not found or there was an error retrieving the data.
Interest Rates Are Low, but Loans Are Harder to Get. Here’s Why not found or there was an error retrieving the data.
The Voice’ and ‘Gentefied not found or there was an error retrieving the data.
This Is Not Tom Brady’s Epilogu not found or there was an error retrieving the data.
Pop and Rock Listings for March 29-April  not found or there was an error retrieving the data.
The Evangelical Case Against Judge Kavanaug not found or there was an error retrieving the data.
Here’s Another Fun Thing You Can Do not found or there was an error retrieving the data.
Suntan,’ and Re

In [None]:
# Create an empty list to store the results
mov_results_list = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests

request_counter = 0

# Loop for making requests
for i in range(100):  # Assuming 100 requests need to be made
    # Make the API request here
    request_counter += 1
    
    if request_counter % 50 == 0:
        time.sleep(1)  # Sleep for 1 second after every 50 requests

# Loop through the titles; title_list
  

    # Check if we need to sleep before making a request


    # Add 1 to the request counter

    
    # Perform a "GET" request for The Movie Database


    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try:
        # Get movie id
        movie_ID = response_data['results'][0]['id']

        # Make a request for a the full movie details
        response _data = requests.get(url + titles + tmdb_key_string).json()

        # Execute "GET" request with url

        
        # Extract the genre names into a list


        # Extract the spoken_languages' English name into a list


        # Extract the production_countries' name into a list


        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list

        
        # Print out the title that was found
        except:
            print(titles+"not found")


In [None]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data


# for review in results_list[:5]:
 #   print(json.dumps(results, indent=4))  


In [None]:
# Convert the results to a DataFrame


### Merge and Clean the Data for Export

In [None]:
# Merge the New York Times reviews and TMDB DataFrames on title


In [None]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing


# Create a list of characters to remove


# Loop through the list of columns to fix

    # Convert the column to type 'str'


    # Loop through characters to remove


# Display the fixed DataFrame


In [None]:
# Drop "byline.person" column


In [None]:
# Delete duplicate rows and reset index


In [None]:
# Export data to CSV without the index
