### Import Required Libraries and Set Up Environment Variables

In [83]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
from pandas import json_normalize


In [84]:
# Set environment variables from the .env in the local environment
load_dotenv("myexample.env")

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

tmdb_api_key



'62c616a298014faf267ba26db19a9a2c'

### Access the New York Times API

In [85]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
nyt_api_url = f"{url}fq={filter_query}&fl={field_list}&begin_date={begin_date}&end_date{end_date}&sort={sort}&api-key={nyt_api_key}"



In [86]:
# Create an empty list to store the reviews
reviews_list  = []

# loop through pages 0-19

    # create query with a page number
    # API results show 10 articles at a time
 # loop through the reviews["response"]["docs"] and append each review to the list
for pageid in range(0, 20):    
        # Make a "GET" request and retrieve the JSON
        reviews = requests.get(nyt_api_url + "&page=" + str(pageid)).json()
        # Print the page that was just retrieved
        print(f"Checked page {pageid}")
        # Add a twelve second interval between queries to stay within API query limits
        time.sleep(12)
        try:
            # Try and save the reviews to the reviews_list
            for review in reviews["response"]["docs"]:
                reviews_list.append(review)               
        except:
            print(f"No data found for the page {pageid}")
      

reviews_list
  


Checked page 0
Checked page 1


[{'web_url': 'https://www.nytimes.com/2024/04/25/movies/challengers-review-zendaya-mike-faist-josh-oconnor.html',
  'snippet': 'Zendaya, Josh O’Connor and Mike Faist play friends, lovers and foes on and off the tennis court in Luca Guadagnino’s latest.',
  'source': 'The New York Times',
  'headline': {'main': '‘Challengers’ Review: Game, Set, Love Matches',
   'kicker': 'Critic’s pick',
   'content_kicker': None,
   'print_headline': 'Triple Fault: Game, Set, Love Matches',
   'name': None,
   'seo': None,
   'sub': None},
  'keywords': [{'name': 'subject', 'value': 'Movies', 'rank': 1, 'major': 'N'},
   {'name': 'creative_works',
    'value': 'Challengers (Movie)',
    'rank': 2,
    'major': 'N'},
   {'name': 'persons', 'value': 'Guadagnino, Luca', 'rank': 3, 'major': 'N'},
   {'name': 'persons', 'value': 'Coleman, Zendaya', 'rank': 4, 'major': 'N'},
   {'name': 'persons', 'value': 'Faist, Mike', 'rank': 5, 'major': 'N'},
   {'name': 'persons',
    'value': "O'Connor, Josh (Actor)",

In [81]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(reviews_list[:5],indent=4))


[
    {
        "web_url": "https://www.nytimes.com/2024/04/25/movies/challengers-review-zendaya-mike-faist-josh-oconnor.html",
        "snippet": "Zendaya, Josh O\u2019Connor and Mike Faist play friends, lovers and foes on and off the tennis court in Luca Guadagnino\u2019s latest.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018Challengers\u2019 Review: Game, Set, Love Matches",
            "kicker": "Critic\u2019s pick",
            "content_kicker": null,
            "print_headline": "Triple Fault: Game, Set, Love Matches",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "Challengers (Movie)",
                "rank": 2,
                "major": "N"


In [82]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = json_normalize(reviews_list)
reviews_df 


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2024/04/25/movies/chal...,"Zendaya, Josh O’Connor and Mike Faist play fri...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-04-25T17:15:47+0000,1022,"‘Challengers’ Review: Game, Set, Love Matches",Critic’s pick,,"Triple Fault: Game, Set, Love Matches",,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
1,https://www.nytimes.com/2024/04/25/movies/unsu...,"In fact, there’s a lot of singing in the clan ...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-04-25T09:04:35+0000,274,‘Unsung Hero’ Review: Music Dedicated to the O...,,,Unsung Hero,,,,By Nicolas Rapold,"[{'firstname': 'Nicolas', 'middlename': None, ...",
2,https://www.nytimes.com/2024/03/07/movies/love...,"In this neo-noir, the ever reliable, always wa...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-03-07T10:03:26+0000,986,"‘Love Lies Bleeding’ Review: Kristen Stewart, ...",Critic’s Pick,,"Bad Romance, Hard-Boiled Style",,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
3,https://www.nytimes.com/2024/02/14/movies/blee...,Ewan McGregor plays father to his real-life da...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-02-14T23:21:32+0000,406,‘Bleeding Love’ Review: On the Road With Dad,,,"Riding Shotgun With Dad, Onscreen and in Reality",,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
4,https://www.nytimes.com/2024/02/14/movies/bob-...,This patchy biopic lauds the Marley of dormito...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-02-14T10:02:25+0000,926,‘Bob Marley: One Love’ Review: Mostly Positive...,,,"Symbol of Unity, Man of Complexity",,,,By Amy Nicholson,"[{'firstname': 'Amy', 'middlename': None, 'las...",
5,https://www.nytimes.com/2024/02/08/movies/moll...,This rom-com brings futuristic absurdity and n...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-02-08T12:00:09+0000,275,"‘Molli and Max in the Future’ Review: Love, In...",Critic’s Pick,,Molli and Max In the Future,,,,By Nicolas Rapold,"[{'firstname': 'Nicolas', 'middlename': None, ...",
6,https://www.nytimes.com/2024/02/08/movies/the-...,Juliette Binoche stars in an instant culinary ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2024-02-08T10:00:20+0000,1046,"‘The Taste of Things’ Review: Love, Loss and L...",Critic’s Pick,,"At Their Table, Passion Imbues Every Meal",,,,By Alissa Wilkinson,"[{'firstname': 'Alissa', 'middlename': None, '...",
7,https://www.nytimes.com/2024/01/25/movies/pict...,The Brazilian director Kleber Mendonça Filho d...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2024-01-25T16:40:37+0000,796,‘Pictures of Ghosts’ Review: Layers of Love an...,Critic’s Pick,,His Love for Cinema and Home Are Inseparable,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
8,https://www.nytimes.com/2023/12/21/movies/anyo...,Sydney Sweeney and Glen Powell romp in a rom-c...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-12-21T14:00:07+0000,542,‘Anyone but You’ Review: Baring Bums in the La...,,,Adrift on the Tides of Fake Love,,,,By Amy Nicholson,"[{'firstname': 'Amy', 'middlename': None, 'las...",
9,https://www.nytimes.com/2023/11/16/movies/fall...,In the latest from the Finnish filmmaker Aki K...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-11-16T18:04:26+0000,873,‘Fallen Leaves’ Review: Love (and Laughs) Amon...,Critic’s pick,,"Love, and Laughs, Among the Ruins",,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",


In [64]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
reviews_df["title"] = reviews_df["headline.main"].str.extract(r'\u2018(.*?)\u2019')
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2024/03/07/movies/love...,"In this neo-noir, the ever reliable, always wa...",The New York Times,"subject: Movies;persons: Stewart, Kristen;pers...",2024-03-07T10:03:26+0000,986,"‘Love Lies Bleeding’ Review: Kristen Stewart, ...",Critic’s Pick,,"Bad Romance, Hard-Boiled Style",,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Love Lies Bleeding
1,https://www.nytimes.com/2023/10/05/movies/my-l...,This animated musical about a young woman’s se...,The New York Times,"subject: Movies;persons: Baumane, Signe;person...",2023-10-05T11:00:31+0000,254,‘My Love Affair With Marriage’ Review: A Tale ...,Critic’s Pick,,My Love Affair With Marriage,,,,By Natalia Winkelman,"[{'firstname': 'Natalia', 'middlename': None, ...",,My Love Affair With Marriage
2,https://www.nytimes.com/2023/08/10/movies/red-...,"This film, about an American president’s son w...",The New York Times,"subject: Movies;creative_works: Red, White & R...",2023-08-10T16:00:02+0000,529,"‘Red, White & Royal Blue’ Review: Keep Calm an...",,,Young Love in High Places,,,,By Amy Nicholson,"[{'firstname': 'Amy', 'middlename': None, 'las...",,"Red, White & Royal Blue"


In [22]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    #keyword_list = keyword_list_Str.tolist()
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords


reviews_df["keywords"] = reviews_df["keywords"].apply(extract_keywords)

reviews_df

TypeError: string indices must be integers

In [24]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles  = reviews_df["title"].to_list()
titles 

['Love Lies Bleeding',
 'My Love Affair With Marriage',
 'Red, White & Royal Blue']

### Access The Movie Database API

In [47]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key
tmdb_key_string_id = "?api_key=" + tmdb_api_key

In [59]:
# Create an empty list to store the results
tmdb_movies_list  = []
request_counter  = 1
movie_byid_api =  "https://api.themoviedb.org/3/movie/"
tmdb_dec = {}

#funcation to get movie id 
def get_movie_id(title):
  try:
    movieID = 0
    movie_api_url = f"{url}{title}{tmdb_key_string}"
    movie_response = requests.get(movie_api_url).json()
    movieID = movie_response["results"][0]['id']
    return movieID
  except:
    print(f"Movie not found :  {title}")  
  finally:
    return movieID
  
#funcation to get movie details 
def get_movie_details(id):
  movie_byid_url = f"{movie_byid_api}{movie_id}{tmdb_key_string_id}"
  response = requests.get(movie_byid_url).json()
  return response
  
# Loop through the titles
for title in titles:
  # Create a request counter to sleep the requests after a multiple of 50 requests
  if request_counter == 50:
     time.sleep(10)

  # Get movie id
  movie_id =  get_movie_id(title)
  
  if(movie_id > 0):
    try:
      # Make a request for a the full movie details
      movie_detail =  get_movie_details(movie_id)
      # Extract the genre names into a list
      genres = [genre['name'] for genre in movie_detail['genres']]
      # Extract the spoken_languages' English name into a list
      spoken_languages= [production_countries['name'] for production_countries in movie_detail['production_countries']]
      # Extract the production_countries' name into a list
      production_countries = [spoken_languages['name'] for spoken_languages in movie_detail['spoken_languages']]
      # Add the relevant data to a dictionary 
      movie_info = {
        "title": movie_detail["title"],
        "original_title": movie_detail["original_title"],
        "budget": movie_detail["budget"],
        "original_language": movie_detail["original_language"],
        "homepage": movie_detail["homepage"],
        "overview": movie_detail["overview"],
        "popularity": movie_detail["popularity"],
        "runtime": movie_detail["runtime"],
        "revenue": movie_detail["revenue"],
        "release_date": movie_detail["release_date"],
        "vote_average": movie_detail["vote_average"],
        "vote_count": movie_detail["vote_count"],
        "genres": genres,
        "spoken_languages": spoken_languages,
        "production_countries": production_countries
      }
       # append it to the tmdb_movies_list list
      tmdb_movies_list.append(movie_info)
       # Print out the title that was found
      print(f"Moview found: {title}")
      request_counter += request_counter
    except:
      print("Error retriving movie details for {title}")
    # Create a request counter to sleep the requests after a multiple
    # of 50 requests
  




948549
559707
930094
[{'title': 'Love Lies Bleeding', 'original_title': 'Love Lies Bleeding', 'budget': 0, 'original_language': 'en', 'homepage': 'https://tickets.loveliesbleeding.movie', 'overview': 'Reclusive gym manager Lou falls hard for Jackie, an ambitious bodybuilder headed through town to Las Vegas in pursuit of her dream. But their love ignites violence, pulling them deep into the web of Lou’s criminal family.', 'popularity': 63.542, 'runtime': 104, 'revenue': 8000000, 'release_date': '2024-03-08', 'vote_average': 7.5, 'vote_count': 39, 'genres': ['Crime', 'Romance', 'Thriller'], 'spoken_languages': ['United Kingdom', 'United States of America'], 'production_countries': ['English']}, {'title': 'My Love Affair with Marriage', 'original_title': 'My Love Affair with Marriage', 'budget': 0, 'original_language': 'en', 'homepage': 'https://www.myloveaffairwithmarriagemovie.com/', 'overview': "From an early age, songs and fairytales convinced Zelma that Love would solve all her probl

In [62]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
json_results = json.dumps(tmdb_movies_list[:5], indent=4)
print(json_results)

[
    {
        "title": "Love Lies Bleeding",
        "original_title": "Love Lies Bleeding",
        "budget": 0,
        "original_language": "en",
        "homepage": "https://tickets.loveliesbleeding.movie",
        "overview": "Reclusive gym manager Lou falls hard for Jackie, an ambitious bodybuilder headed through town to Las Vegas in pursuit of her dream. But their love ignites violence, pulling them deep into the web of Lou\u2019s criminal family.",
        "popularity": 63.542,
        "runtime": 104,
        "revenue": 8000000,
        "release_date": "2024-03-08",
        "vote_average": 7.5,
        "vote_count": 39,
        "genres": [
            "Crime",
            "Romance",
            "Thriller"
        ],
        "spoken_languages": [
            "United Kingdom",
            "United States of America"
        ],
        "production_countries": [
            "English"
        ]
    },
    {
        "title": "My Love Affair with Marriage",
        "original_title": 

In [61]:
# Convert the results to a DataFrame
tmdb_movies_df =  json_normalize(tmdb_movies_list)
tmdb_movies_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,Love Lies Bleeding,Love Lies Bleeding,0,en,https://tickets.loveliesbleeding.movie,Reclusive gym manager Lou falls hard for Jacki...,63.542,104,8000000,2024-03-08,7.5,39,"[Crime, Romance, Thriller]","[United Kingdom, United States of America]",[English]
1,My Love Affair with Marriage,My Love Affair with Marriage,0,en,https://www.myloveaffairwithmarriagemovie.com/,"From an early age, songs and fairytales convin...",5.604,108,0,2023-06-07,6.9,8,"[Animation, Drama]","[Latvia, Luxembourg, United States of America]",[English]
2,"Red, White & Royal Blue","Red, White & Royal Blue",0,en,https://www.amazon.com/dp/B0BYST2QY1,"After an altercation between Alex, the preside...",45.381,121,0,2023-07-27,8.02,1105,"[Comedy, Romance]",[United States of America],"[Español, English]"


### Merge and Clean the Data for Export

In [67]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(reviews_df, tmdb_movies_df, on="title", how="inner",)
merged_df.head(5)
merged_df.columns

Index(['web_url', 'snippet', 'source', 'keywords', 'pub_date', 'word_count',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'title', 'original_title', 'budget',
       'original_language', 'homepage', 'overview', 'popularity', 'runtime',
       'revenue', 'release_date', 'vote_average', 'vote_count', 'genres',
       'spoken_languages', 'production_countries'],
      dtype='object')

In [69]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixingcolumn
columns_to_fix = ['genres','spoken_languages','production_countries']

# Create a list of characters to remove
characters_to_remove = ['[',']','\'']

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype(str)
    # Loop through characters to remove
    for char in characters_to_remove:
        merged_df[column] = merged_df[column].str.replace(char,'')
        
# Display the fixed DataFrame
merged_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2024/03/07/movies/love...,"In this neo-noir, the ever reliable, always wa...",The New York Times,"subject: Movies;persons: Stewart, Kristen;pers...",2024-03-07T10:03:26+0000,986,"‘Love Lies Bleeding’ Review: Kristen Stewart, ...",Critic’s Pick,,"Bad Romance, Hard-Boiled Style",...,Reclusive gym manager Lou falls hard for Jacki...,63.542,104,8000000,2024-03-08,7.5,39,"Crime, Romance, Thriller","United Kingdom, United States of America",English
1,https://www.nytimes.com/2023/08/10/movies/red-...,"This film, about an American president’s son w...",The New York Times,"subject: Movies;creative_works: Red, White & R...",2023-08-10T16:00:02+0000,529,"‘Red, White & Royal Blue’ Review: Keep Calm an...",,,Young Love in High Places,...,"After an altercation between Alex, the preside...",45.381,121,0,2023-07-27,8.02,1105,"Comedy, Romance",United States of America,"Español, English"


In [73]:
# Drop "byline.person" column
merged_df = merged_df.drop(columns=['byline.person'])
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 30 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   web_url                  2 non-null      object 
 1   snippet                  2 non-null      object 
 2   source                   2 non-null      object 
 3   keywords                 2 non-null      object 
 4   pub_date                 2 non-null      object 
 5   word_count               2 non-null      int64  
 6   headline.main            2 non-null      object 
 7   headline.kicker          1 non-null      object 
 8   headline.content_kicker  0 non-null      object 
 9   headline.print_headline  2 non-null      object 
 10  headline.name            0 non-null      object 
 11  headline.seo             0 non-null      object 
 12  headline.sub             0 non-null      object 
 13  byline.original          2 non-null      object 
 14  byline.organization      0 non

In [74]:
# Delete duplicate rows and reset index
merged_df = merged_df.drop_duplicates().reset_index(drop=True)
merged_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2024/03/07/movies/love...,"In this neo-noir, the ever reliable, always wa...",The New York Times,"subject: Movies;persons: Stewart, Kristen;pers...",2024-03-07T10:03:26+0000,986,"‘Love Lies Bleeding’ Review: Kristen Stewart, ...",Critic’s Pick,,"Bad Romance, Hard-Boiled Style",...,Reclusive gym manager Lou falls hard for Jacki...,63.542,104,8000000,2024-03-08,7.5,39,"Crime, Romance, Thriller","United Kingdom, United States of America",English
1,https://www.nytimes.com/2023/08/10/movies/red-...,"This film, about an American president’s son w...",The New York Times,"subject: Movies;creative_works: Red, White & R...",2023-08-10T16:00:02+0000,529,"‘Red, White & Royal Blue’ Review: Keep Calm an...",,,Young Love in High Places,...,"After an altercation between Alex, the preside...",45.381,121,0,2023-07-27,8.02,1105,"Comedy, Romance",United States of America,"Español, English"


In [None]:
# Export data to CSV without the index
merged_df.to_csv('movie_mereged_data.csv',index=False)