### Import Required Libraries and Set Up Environment Variables

In [69]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [70]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [54]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
#filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

#CE - the default filter_query provided above was missing parenthesis. I added below.
filter_query = 'section_name:("Movies") AND type_of_material:("Review") AND headline:("love")'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
# CE - ignore (pasted a component of the URL from the nyt website) /articlesearch.json?q={query}&fq={filter}
# CE - ignore (pasted a component of the URL from the nyt website) &api-key=

nyt_api = url + "fq=" + filter_query + "&fl=" + field_list + "&" + sort + "&" + begin_date + "&" + end_date +"&api-key="+ nyt_api_key
# CE- tested below the built URL excluding the field list.
# nyt_api = url + "fq=" + filter_query + "&" + sort + "&" + begin_date + "&" + end_date +"&api-key="+ nyt_api_key

nyt = requests.get(nyt_api).json()

#CE - tested the url by looking at it via .dumps below.
print(json.dumps(nyt, indent=10))

{
          "status": "OK",
          "copyright": "Copyright (c) 2024 The New York Times Company. All Rights Reserved.",
          "response": {
                    "docs": [
                              {
                                        "web_url": "https://www.nytimes.com/2023/06/20/movies/sublime-review.html",
                                        "snippet": "A teenager dreams of pop songs, and his best friend, in Mariano Biasin\u2019s tender gay coming-of-age drama.",
                                        "source": "The New York Times",
                                        "headline": {
                                                  "main": "\u2018Sublime\u2019 Review: Two Boys, One in Love",
                                                  "kicker": null,
                                                  "content_kicker": null,
                                                  "print_headline": "Sublime",
                                                  "name"

In [55]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for page in range(0, 20):

    # create query with a page number
    # API results show 10 articles at a time
    nyt_pages_url = nyt_api + "&page=" + str(page)

    
    # Make a "GET" request and retrieve the JSON
    nyt_pages = requests.get(nyt_pages_url).json()

    
    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
    # Try and save the reviews to the reviews_list
    
    try:

        # loop through the reviews["response"]["docs"] and append each review to the list

        reviews_list.append(nyt_pages["response"]["docs"])
        
        # Print the page that was just retrieved
        print("Page "+str(page)+" retrieved and appended.")

        # Print the page number that had no results then break from the loop
        
    except Exception:
        print("Page "+ str(page) + " had no results.")
        pass


 

Page 0 retrieved and appended.
Page 1 retrieved and appended.
Page 2 retrieved and appended.
Page 3 retrieved and appended.
Page 4 had no results.
Page 5 retrieved and appended.
Page 6 retrieved and appended.
Page 7 retrieved and appended.
Page 8 retrieved and appended.
Page 9 retrieved and appended.
Page 10 retrieved and appended.
Page 11 retrieved and appended.
Page 12 retrieved and appended.
Page 13 retrieved and appended.
Page 14 retrieved and appended.
Page 15 retrieved and appended.
Page 16 retrieved and appended.
Page 17 retrieved and appended.
Page 18 retrieved and appended.
Page 19 retrieved and appended.


In [56]:
# CE Note: my results above created lists nested within a list. 
# CE Note: I created a for loop / line comprehension below to extract the nested lists from the list
#flat_list =[]
# for l in reviews_list:
#     for x in list:
#         flat_list.append(x)
# print(flat_list)
    
flat_list = [x for l in reviews_list for x in l]  
print(flat_list)

[{'web_url': 'https://www.nytimes.com/2023/06/20/movies/sublime-review.html', 'snippet': 'A teenager dreams of pop songs, and his best friend, in Mariano Biasin’s tender gay coming-of-age drama.', 'source': 'The New York Times', 'headline': {'main': '‘Sublime’ Review: Two Boys, One in Love', 'kicker': None, 'content_kicker': None, 'print_headline': 'Sublime', 'name': None, 'seo': None, 'sub': None}, 'keywords': [{'name': 'subject', 'value': 'Movies', 'rank': 1, 'major': 'N'}, {'name': 'creative_works', 'value': 'Sublime (Movie)', 'rank': 2, 'major': 'N'}, {'name': 'persons', 'value': 'Biasin, Mariano', 'rank': 3, 'major': 'N'}], 'pub_date': '2023-06-20T17:48:07+0000', 'byline': {'original': 'By Erik Piepenburg', 'person': [{'firstname': 'Erik', 'middlename': None, 'lastname': 'Piepenburg', 'qualifier': None, 'title': None, 'role': 'reported', 'organization': '', 'rank': 1}], 'organization': None}, 'word_count': 266}, {'web_url': 'https://www.nytimes.com/2018/01/18/movies/kangaroo-a-lov

In [57]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
reviews_flat = flat_list.copy()
# reviews_flat_5 = reviews_flat[:5]
print(json.dumps(reviews_flat[:5],indent=4))


[
    {
        "web_url": "https://www.nytimes.com/2023/06/20/movies/sublime-review.html",
        "snippet": "A teenager dreams of pop songs, and his best friend, in Mariano Biasin\u2019s tender gay coming-of-age drama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018Sublime\u2019 Review: Two Boys, One in Love",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "Sublime",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "Sublime (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
                "value": "Biasin, Mariano",
         

In [58]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = pd.json_normalize(reviews_flat)
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/06/20/movies/subl...,"A teenager dreams of pop songs, and his best f...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-06-20T17:48:07+0000,266,"‘Sublime’ Review: Two Boys, One in Love",,,Sublime,,,,By Erik Piepenburg,"[{'firstname': 'Erik', 'middlename': None, 'la...",
1,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,"[{'name': 'creative_works', 'value': 'Kangaroo...",2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
2,https://www.nytimes.com/1991/10/11/movies/shor...,"Short-order cookery, dreams of love. Warm and ...",The New York Times,"[{'name': 'subject', 'value': 'MOTION PICTURES...",1991-10-11T05:00:00+0000,1117,Short-Order Cookery And Dreams of Love,,,Short-Order Cookery And Dreams of Love,,,,By Janet Maslin,"[{'firstname': 'Janet', 'middlename': None, 'l...",
3,https://www.nytimes.com/1989/07/12/movies/revi...,Rob Reiner’s take on romantically bruised New ...,The New York Times,"[{'name': 'subject', 'value': 'Reviews', 'rank...",1989-07-12T05:00:00+0000,942,Review/Film; It's Harry [ Loves ] Sally in a...,,,Review/Film; It's Harry [ Loves ] Sally in a...,,,,By Caryn James,"[{'firstname': 'Caryn', 'middlename': None, 'l...",
4,https://www.nytimes.com/2013/09/20/movies/jewt...,"In “Jewtopia,” a young man asks a childhood fr...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2013-09-19T23:33:15+0000,272,Love’s Eternal Masquerade,Movie Review,,Jewtopia,,,,By David DeWitt,"[{'firstname': 'David', 'middlename': None, 'l...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,https://www.nytimes.com/1999/10/29/movies/coun...,Ben Ratliff reviews country music performance ...,The New York Times,"[{'name': 'persons', 'value': 'Black, Clint', ...",1999-10-29T05:00:00+0000,352,"Where Hearts Don't Cheat, And Love Is the Current",COUNTRY MUSIC,,"COUNTRY MUSIC; Where Hearts Don't Cheat, And L...",,,,By Ben Ratliff,"[{'firstname': 'Ben', 'middlename': None, 'las...",
186,https://www.nytimes.com/2017/11/09/movies/revi...,In Joachim Trier’s sly psychological thriller ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-11-09T20:08:31+0000,757,"Review: In ‘Thelma,’ a Woman in Love Can Burn ...",,,A Woman in Love Can Burn Down the World,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
187,https://www.nytimes.com/1997/08/29/movies/thef...,Irish Gypsy leader betrayed. Splendid performa...,The New York Times,"[{'name': 'persons', 'value': 'Harris, Richard...",1997-08-29T05:00:00+0000,602,"Theft, Adultery and Good Old Love",FILM REVIEW,,"Theft, Adultery and Good Old Love",,,,By Stephen Holden,"[{'firstname': 'Stephen', 'middlename': None, ...",
188,https://www.nytimes.com/1994/11/04/movies/film...,Vietnam veteran and children in 1970 Mississip...,The New York Times,"[{'name': 'persons', 'value': 'Avnet, Jon', 'r...",1994-11-04T05:00:00+0000,710,Family Values and Love In Vietnam Aftermath,FILM REVIEW,,FILM REVIEW; Family Values and Love In Vietnam...,,,,By Janet Maslin,"[{'firstname': 'Janet', 'middlename': None, 'l...",


CE Note: Pasted part of the Challenge documentation:
Extract the movie title from the "headline.main" column and save it to a new column "title". To do this, you will use the Pandas apply() method and the following lambda function:

This code takes the string in the cell and extracts the characters between the unicode quotation marks, as long as a space and the word "Review" follows the closing quotation mark.

lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")]
These titles will be used in the query for The Movie Database.


In [59]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# # End string should include " Review" to avoid cutting title early

#CE Note: Pasted from challenge instructions:
#st[st.find("\u2018")+1:st.find("\u2019 Review")]

#CE Note: My code wasn't working. Chatgpt provided guidance that advised that I shouldn't have the apply within a list.
reviews_df["title"] = reviews_df["headline.main"].apply(lambda x: x[x.find("\u2018") + 1 : x.find("\u2019 Review")])

 
#reviews_df["title"] = reviews_df["headline.main"].apply(lambda x: x[x.find("\u2018") + 1 : x.find("\u2019 Review")] if ("\u2018" in x and "\u2019 Review" in x) else x)


reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/06/20/movies/subl...,"A teenager dreams of pop songs, and his best f...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-06-20T17:48:07+0000,266,"‘Sublime’ Review: Two Boys, One in Love",,,Sublime,,,,By Erik Piepenburg,"[{'firstname': 'Erik', 'middlename': None, 'la...",,Sublime
1,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,"[{'name': 'creative_works', 'value': 'Kangaroo...",2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Kangaroo: A Love-Hate Story’ Exposes a Wildlif...
2,https://www.nytimes.com/1991/10/11/movies/shor...,"Short-order cookery, dreams of love. Warm and ...",The New York Times,"[{'name': 'subject', 'value': 'MOTION PICTURES...",1991-10-11T05:00:00+0000,1117,Short-Order Cookery And Dreams of Love,,,Short-Order Cookery And Dreams of Love,,,,By Janet Maslin,"[{'firstname': 'Janet', 'middlename': None, 'l...",,Short-Order Cookery And Dreams of Lov
3,https://www.nytimes.com/1989/07/12/movies/revi...,Rob Reiner’s take on romantically bruised New ...,The New York Times,"[{'name': 'subject', 'value': 'Reviews', 'rank...",1989-07-12T05:00:00+0000,942,Review/Film; It's Harry [ Loves ] Sally in a...,,,Review/Film; It's Harry [ Loves ] Sally in a...,,,,By Caryn James,"[{'firstname': 'Caryn', 'middlename': None, 'l...",,Review/Film; It's Harry [ Loves ] Sally in a...
4,https://www.nytimes.com/2013/09/20/movies/jewt...,"In “Jewtopia,” a young man asks a childhood fr...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2013-09-19T23:33:15+0000,272,Love’s Eternal Masquerade,Movie Review,,Jewtopia,,,,By David DeWitt,"[{'firstname': 'David', 'middlename': None, 'l...",,Love’s Eternal Masquerad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,https://www.nytimes.com/1999/10/29/movies/coun...,Ben Ratliff reviews country music performance ...,The New York Times,"[{'name': 'persons', 'value': 'Black, Clint', ...",1999-10-29T05:00:00+0000,352,"Where Hearts Don't Cheat, And Love Is the Current",COUNTRY MUSIC,,"COUNTRY MUSIC; Where Hearts Don't Cheat, And L...",,,,By Ben Ratliff,"[{'firstname': 'Ben', 'middlename': None, 'las...",,"Where Hearts Don't Cheat, And Love Is the Curren"
186,https://www.nytimes.com/2017/11/09/movies/revi...,In Joachim Trier’s sly psychological thriller ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-11-09T20:08:31+0000,757,"Review: In ‘Thelma,’ a Woman in Love Can Burn ...",,,A Woman in Love Can Burn Down the World,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,"Thelma,’ a Woman in Love Can Burn Down the Worl"
187,https://www.nytimes.com/1997/08/29/movies/thef...,Irish Gypsy leader betrayed. Splendid performa...,The New York Times,"[{'name': 'persons', 'value': 'Harris, Richard...",1997-08-29T05:00:00+0000,602,"Theft, Adultery and Good Old Love",FILM REVIEW,,"Theft, Adultery and Good Old Love",,,,By Stephen Holden,"[{'firstname': 'Stephen', 'middlename': None, ...",,"Theft, Adultery and Good Old Lov"
188,https://www.nytimes.com/1994/11/04/movies/film...,Vietnam veteran and children in 1970 Mississip...,The New York Times,"[{'name': 'persons', 'value': 'Avnet, Jon', 'r...",1994-11-04T05:00:00+0000,710,Family Values and Love In Vietnam Aftermath,FILM REVIEW,,FILM REVIEW; Family Values and Love In Vietnam...,,,,By Janet Maslin,"[{'firstname': 'Janet', 'middlename': None, 'l...",,Family Values and Love In Vietnam Aftermat


In [60]:
#CE NOTE: Ignore this cell, I did not see the function provided in the challenge notes before doing below.

#CE Note: Initially googled and chatgpt-ed to figure out how to extract from a string.
# CE first attempt:
# quotes = r'[\u2018\u2019](.*?)[\u2018\u2019]'
# quotes2 = r"['][\u2018\u2019](.*?)[\u2018\u2019][']"
# reviews_df["title"] = ""
# i = 0

#for line in reviews_df["headline.main"]:
#     if quotes or quotes2 in line:
#         reviews_df.iloc[i,-1] = line.str.extract(quotes)
#         i+= 1
#     else:
#         reviews_df.iloc[i,-1] = line
#         i+= 1

# reviews_df.head()

#CE second attempt:
# for line in reviews_df["headline.main"]:
#     if quotes or quotes2 in line:
#         extract = reviews_df["headline.main"].str.extract(quotes)
#         reviews_df.iloc[i,-1] = extract[i]
#         i+= 1
#     else:
#         reviews_df.iloc[i,-1] = line
#         i+= 1
## quotes = r'\u2018(.*?)\u2019'
## quotes2 = r"\'(.*?) \'" 




# CE third attempt:
# for line in reviews_df["headline.main"]:
#     if quotes in line or quotes2 in line:
#         extract = reviews_df["headline.main"].str.extract(quotes)
#         reviews_df.iloc[i, -1] = extract.iloc[i, 0]
#     else:
#         reviews_df.iloc[i, -1] = line
#     i += 1

# for line in reviews_df["headline.main"]:
#     if quotes in line or quotes2 in line:
#         extract = reviews_df["headline.main"].str.extract(quotes)
#         reviews_df.iloc[i, -1] = extract.iloc[0, 0] if not extract.empty else None
#     else:
#         reviews_df.iloc[i, -1] = line
#     i += 1

# CE note: Further queries on chatgpt and was provided guidance using re.findall as below: 
# import re
# quotes = r'\u2018(.*?)\u2019'
# quotes2 = r"\'(.*?) \' "
# reviews_df["title"] = ""

# for i, line in enumerate(reviews_df["headline.main"]):
#     updated = re.findall(quotes, line) or re.findall(quotes2, line)
#     if updated:
#         reviews_df.iloc[i, -1] = updated[0]
#     else:
#         reviews_df.iloc[i, -1] = line

# reviews_df.head()

In [61]:
## CE NOTE: for prompt in cell below I've extracted all the info of one of the df keywords column cell to see the full info.
reviews_df["keywords"].iloc[0]


[{'name': 'subject', 'value': 'Movies', 'rank': 1, 'major': 'N'},
 {'name': 'creative_works',
  'value': 'Sublime (Movie)',
  'rank': 2,
  'major': 'N'},
 {'name': 'persons', 'value': 'Biasin, Mariano', 'rank': 3, 'major': 'N'}]

In [62]:
# Extract 'name' and 'value' from items in "keywords" column
#CE note: pasted from the other challenge instructions: Use the supplied extract_keywords function to convert 
#the "keywords" column from a list of dictionaries to strings using the apply() method.


def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords



# Fix the "keywords" column by converting cells from a list to a string


reviews_df["keywords"] = reviews_df["keywords"].apply(lambda x: extract_keywords(x))

#CE Note: created another iloc to confirm the column's data returned as required.
reviews_df["keywords"].iloc[0]


'subject: Movies;creative_works: Sublime (Movie);persons: Biasin, Mariano;'

In [64]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles = reviews_df["title"].to_list()
titles

['Sublime',
 'Kangaroo: A Love-Hate Story’ Exposes a Wildlife Massacr',
 'Short-Order Cookery And Dreams of Lov',
 "Review/Film; It's Harry  [ Loves ]  Sally in a Romance Of New Yorkers and Neurose",
 'Love’s Eternal Masquerad',
 'The Tomorrow Man',
 'FILM REVIEW;Equal Opportunity in Games of Lov',
 'Marriage Story',
 'Bones and All',
 'Love, Antosha',
 'You Can Live Forever',
 'From Tapestried Villa to Sylvan Glade, Aristocratic Women in Lov',
 "A Love Triangle Like No Other in 'Two Drifters'",
 'Teenager Dies, Leaving Love, and a Life, Behin',
 'FILM REVIEW;Of a Circus, Church and Lesbian Lov',
 'Homage to a Poet Who Loved Polan',
 'Review/Film;   Love and Pain Among the Gentry in Irelan',
 'Prem Ratan Dhan Payo,’ a Bollywood Tale of a Prince-and-Plebe Doubl',
 'Review/Film; Mysticism and Love Inside a Hammer and Sickl',
 'A Promoter Finds a New Cause: Go',
 'Now They’ll Love Me, a Twin Sister Scheme',
 'On the Streets of Lisbon, Love, Romance and Desir',
 'They’ll Love Me When I’m D

### Access The Movie Database API

CE Note - Pasted part of the Challenge documentation:
    
Part 2: Access The Movie Database API
Consult the Search & Query [https://developer.themoviedb.org/docs/search-and-query-for-details] for Details documentationLinks to an external site. to build your query URLs. You will be making both types of requests to extract all of the details you need:

The search query is used to find the movie ID from the search by title. Most of this query is included in your starter code, as follows, but you will need to include the movie title in the query.

The movie query is made once you have the movie ID.

You will use the titles list created in Part 1 to perform your queries with The Movie Database.

In [124]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "api_key=" + tmdb_api_key

#CE Note: tested the queries required in the next cell with one movie below to make all 
#the components would work for the loop.


# movie = "Sublime"

movie_url = "https://api.themoviedb.org/3/movie/"

#search_query = url + movie + "&" + tmdb_key_string

# tmdb_json = requests.get(search_query).json()

#print(json.dumps(tmdb_json, indent = 5))

# movie_key = tmdb_json["results"][0]["id"]

# movie_query = movie_url + str(movie_key) + "?" + tmdb_key_string

# movie_details_json = requests.get(movie_query).json()

#print(json.dumps(movie_details_json, indent = 5))

# print(titles)

#CE NOTE: testing genre.languages, countries list for next cell's instructions.
# print(movie_details_json["genres"])
genres = movie_details_json["genres"]
languages = movie_details_json["spoken_languages"]
countries = movie_details_json["production_countries"]

# genres_list = [g["name"] for g in genres if "name" in g]
# print(genres_list)

# #CE NOTE: testing spoken_languages for the next step.
# spoken_languages = [s["name"] for s in languages if "name" in s]
# print(spoken_languages)


# #CE NOTE: testing production_countries for the next step.
# production_countries = [p["name"] for p in countries if "name" in p]
# print(production_countries)

## CE NOTE: testing dictionary addition for next step.


movie_data = ["title", "original_title", "budget", "original_language", "homepage", "overview", "popularity", "runtime", "revenue", "release_date", "vote_average", "vote_count"]

# for movie in movie_data:
#     movie_data_dict[str(movie)] = movie_details_json[movie]

# movie_data_dict["genres"] = genres_list
# movie_data_dict["spoken_languages"] = spoken_languages
# movie_data_dict["production_countries"] = production_countries
    
# print(movie_data_dict)

#CE NOTE: printed the title to see the info I should expect.
for title in titles:
    print(title)

Sublime
Kangaroo: A Love-Hate Story’ Exposes a Wildlife Massacr
Short-Order Cookery And Dreams of Lov
Review/Film; It's Harry  [ Loves ]  Sally in a Romance Of New Yorkers and Neurose
Love’s Eternal Masquerad
The Tomorrow Man
FILM REVIEW;Equal Opportunity in Games of Lov
Marriage Story
Bones and All
Love, Antosha
You Can Live Forever
From Tapestried Villa to Sylvan Glade, Aristocratic Women in Lov
A Love Triangle Like No Other in 'Two Drifters'
Teenager Dies, Leaving Love, and a Life, Behin
FILM REVIEW;Of a Circus, Church and Lesbian Lov
Homage to a Poet Who Loved Polan
Review/Film;   Love and Pain Among the Gentry in Irelan
Prem Ratan Dhan Payo,’ a Bollywood Tale of a Prince-and-Plebe Doubl
Review/Film; Mysticism and Love Inside a Hammer and Sickl
A Promoter Finds a New Cause: Go
Now They’ll Love Me, a Twin Sister Scheme
On the Streets of Lisbon, Love, Romance and Desir
They’ll Love Me When I’m Dead’ Documents Orson Welles’s Last Fil
The Trivially Hip: A Music Geek's Warped Love Lif
A

In [127]:
# Create an empty list to store the results
tmdb_movies_list = []


'''
CE NOTE: Pasting the other additional directions. Create a variable called request_counter and initialize it with the value of 1. This counter should do the following:

Increment by one every time you iterate through the titles list.

Use time.sleep(1) when it reaches a multiple of 50.

Print a message to indicate that the application is sleeping.'''

# CE NOTE: checked my code in chatgpt due to it not working after I've added everything. Chatgpt advised to put the search query variable within the loop.

# Create a request counter to sleep the requests after a multiple
# of 50 requests
request_counter = 1



# Loop through the titles
for title in titles:
   
    # Check if we need to sleep before making a request
    if request_counter % 50 == 0:
        time.sleep(1)
      
    # Add 1 to the request counter
    request_counter += 1
    
    search_query = url + str(title) + "&" + tmdb_key_string
    
    # Perform a "GET" request for The Movie Database
    tmdb_json = requests.get(search_query).json()

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try:
        movie = str(title)
        # Get movie id
        movie_key = tmdb_json["results"][0]["id"]
        
        # Make a request for a the full movie details
        movie_query = movie_url + str(movie_key) + "?" + tmdb_key_string

        # Execute "GET" request with url
        movie_details_json = requests.get(movie_query).json()
        
        # Extract the genre names into a list
        genres_list = [g["name"] for g in genres if "name" in g]

        # Extract the spoken_languages' English name into a list
        spoken_languages = [s["name"] for s in languages if "name" in s]

        # Extract the production_countries' name into a list
        production_countries = [p["name"] for p in countries if "name" in p]

        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list
        
        """Create a dictionary with the following results: title, original_title, budget, original_language, homepage, overview, popularity, runtime, revenue, release_date, vote_average, vote_count, as well as the genres, spoken_languages, and production_countries lists you just created.
        
        Append this dictionary to tmdb_movies_list."""
        movie_data_dict = {}
        
        for data in movie_data:
            movie_data_dict[str(data)] = movie_details_json[data]

        movie_data_dict["genres"] = genres_list
        movie_data_dict["spoken_languages"] = spoken_languages
        movie_data_dict["production_countries"] = production_countries
        
        tmdb_movies_list.append(movie_data_dict)
        
        # Print out the title that was found
        print(f"{movie_details_json['title']} was found and appended.")
        
        """Print out the name of the movie and a message to indicate that the title was found. 
        Use the except clause to print out a statement if a movie is not found."""
        
    except:
        print(f"{title} was not found.")


Sublime was found and appended.
Kangaroo: A Love-Hate Story’ Exposes a Wildlife Massacr was not found.
Short-Order Cookery And Dreams of Lov was not found.
Review/Film; It's Harry  [ Loves ]  Sally in a Romance Of New Yorkers and Neurose was not found.
Love’s Eternal Masquerad was not found.
The Tomorrow Man was found and appended.
FILM REVIEW;Equal Opportunity in Games of Lov was not found.
Marriage Story was found and appended.
Bones and All was found and appended.
Love, Antosha was found and appended.
You Can Live Forever was found and appended.
From Tapestried Villa to Sylvan Glade, Aristocratic Women in Lov was not found.
A Love Triangle Like No Other in 'Two Drifters' was not found.
Teenager Dies, Leaving Love, and a Life, Behin was not found.
FILM REVIEW;Of a Circus, Church and Lesbian Lov was not found.
Homage to a Poet Who Loved Polan was not found.
Review/Film;   Love and Pain Among the Gentry in Irelan was not found.
Prem Ratan Dhan Payo,’ a Bollywood Tale of a Prince-and-Pl

Love Phlows Between Phish and Phan was not found.
Cyrano, My Love was found and appended.
Young Love Caught in the Grips of 9/1 was not found.
An Offer of Love He Can't Refus was not found.
Intertwining Love and Obsessio was not found.
Quincy’ Captures a Lifelong Love Affair With Musi was not found.
Mother's Message on Love and Deat was not found.
Sex and the City, From Hong Kong to Beijin was not found.
Wife of a Spy was found and appended.
When Mother Love Fails, The Community Steps I was not found.
The Mountain Between Us was found and appended.
Hope Gap was found and appended.
How to Train Your Dragon: The Hidden World was found and appended.
A Timeless Tale of True Love in Digital Vide was not found.
They Be Foolish Mortals Who Love the Nightlif was not found.
May-December Love Can Lead to Disconten was not found.
Finding Love On the Road Not Take was not found.
Cassandro was found and appended.
My Love Affair with Marriage was found and appended.
HOW 'SMASH PALACE' PLUMBS THE RUI

In [137]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data

# CE NOTE: sliced the list to get the first 5 results only.
print(json.dumps(tmdb_movies_list[:5], indent = 4))

[
    {
        "title": "Sublime",
        "original_title": "Sublime",
        "budget": 0,
        "original_language": "en",
        "homepage": "",
        "overview": "Admitted to Mt. Abaddon Hospital for a routine procedure, George Grieves discovers that his condition is much more serious and complicated than originally expected; and as his own fears begin to manifest around him, he learns that Mt. Abaddon is not a place where people come to get better... it is a place where people come to die.",
        "popularity": 7.733,
        "runtime": 113,
        "revenue": 0,
        "release_date": "2007-06-15",
        "vote_average": 5.257,
        "vote_count": 109,
        "genres": [
            "Horror",
            "Thriller"
        ],
        "spoken_languages": [
            "English"
        ],
        "production_countries": [
            "United States of America"
        ]
    },
    {
        "title": "The Tomorrow Man",
        "original_title": "The Tomorrow Man",
  

CE Note - Pasted part of the Challenge documentation: Convert the results to a DataFrame called tmdb_df with pd.DataFrame(). You don't need to use json_normalize() this time because we don't have nested objects.

In [138]:
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(tmdb_movies_list)
tmdb_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,Sublime,Sublime,0,en,,Admitted to Mt. Abaddon Hospital for a routine...,7.733,113,0,2007-06-15,5.257,109,"[Horror, Thriller]",[English],[United States of America]
1,The Tomorrow Man,The Tomorrow Man,0,en,https://bleeckerstreetmedia.com/thetomorrowman,Ed Hemsler spends his life preparing for a dis...,7.427,94,0,2019-05-22,5.639,54,"[Horror, Thriller]",[English],[United States of America]
2,Marriage Story,Marriage Story,18000000,en,https://www.marriagestorymovie.com,A stage director and an actress struggle throu...,21.789,137,2300000,2019-09-28,7.746,6607,"[Horror, Thriller]",[English],[United States of America]
3,Bones and All,Bones and All,18000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",23.093,131,15100000,2022-11-18,7.086,1127,"[Horror, Thriller]",[English],[United States of America]
4,"Love, Antosha","Love, Antosha",0,en,https://antonyelchindoc.com/,"From a prolific career in film and television,...",6.771,92,0,2019-08-02,7.255,49,"[Horror, Thriller]",[English],[United States of America]
5,You Can Live Forever,You Can Live Forever,0,en,https://gooddeedentertainment.com/you-can-live...,"When Jaime, a gay teenager, is sent to live in...",4.879,96,15055,2023-03-24,6.8,28,"[Horror, Thriller]",[English],[United States of America]
6,A Journal for Jordan,A Journal for Jordan,25000000,en,https://www.sonypictures.com/movies/ajournalfo...,Based on the true story of First Sergeant Char...,10.841,131,6700000,2021-12-22,7.133,98,"[Horror, Thriller]",[English],[United States of America]
7,"DeRosa: Life, Love & Art in Transition","DeRosa: Life, Love & Art in Transition",8000,en,,Chronicles the extraordinary life of artist Fe...,0.6,66,0,2021-12-04,0.0,0,"[Horror, Thriller]",[English],[United States of America]
8,"To Me, the One Who Loved You",君を愛したひとりの僕へ,0,ja,https://www.crunchyroll.com/series/GVDHX8QM4/t...,Koyomi Hidaka and Shiori Sato meet at his fath...,29.154,98,0,2022-10-07,5.7,55,"[Horror, Thriller]",[English],[United States of America]
9,"Love, the Italian Way",Intrigo a Taormina,0,it,,A group of passengers traveling around the Med...,2.214,100,0,1960-12-22,5.25,4,"[Horror, Thriller]",[English],[United States of America]


### Merge and Clean the Data for Export

In [140]:
# Merge the New York Times reviews and TMDB DataFrames on title
nyt_tmdb = pd.merge(tmdb_df, reviews_df, on = "title")
nyt_tmdb.columns


Index(['title', 'original_title', 'budget', 'original_language', 'homepage',
       'overview', 'popularity', 'runtime', 'revenue', 'release_date',
       'vote_average', 'vote_count', 'genres', 'spoken_languages',
       'production_countries', 'web_url', 'snippet', 'source', 'keywords',
       'pub_date', 'word_count', 'headline.main', 'headline.kicker',
       'headline.content_kicker', 'headline.print_headline', 'headline.name',
       'headline.seo', 'headline.sub', 'byline.original', 'byline.person',
       'byline.organization'],
      dtype='object')

In [151]:
"""The genres, spoken_languages, and production_countries columns were saved as lists, but we want the columns to be 
strings without the list characters ([, ], and '). To fix these columns, perform the following actions:

Create a list of the columns that need fixing called columns_to_fix.

Create a list of characters to remove called characters_to_remove.

Loop through columns_to_fix and do the following:

Use astype() to convert the column to a string.

Loop through the characters_to_remove and use the Pandas str.replace() method to remove the character from the string.

Print the head of the updated DataFrame to confirm the list characters were removed.

Delete any duplicate rows and reset the index.

Export data to a CSV file without the DataFrame's index."""
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ["genres", "spoken_languages", "production_countries"]

# Create a list of characters to remove
characters_to_remove = ['\u005B', '\u005D','\u0027']

# Loop through the list of columns to fix
for col in columns_to_fix:

    # Convert the column to type 'str'
    nyt_tmdb[col] = nyt_tmdb[col].astype(str)

    # Loop through characters to remove
    for char in characters_to_remove:
        nyt_tmdb[col] = nyt_tmdb[col].str.replace(char,"")

# Display the fixed DataFrame
# CE NOTE: used an iloc to display the columns where the change took place.
nyt_tmdb.iloc[:,12:18].head()

Unnamed: 0,genres,spoken_languages,production_countries,web_url,snippet,source
0,"Horror, Thriller",English,United States of America,https://www.nytimes.com/2023/06/20/movies/subl...,"A teenager dreams of pop songs, and his best f...",The New York Times
1,"Horror, Thriller",English,United States of America,https://www.nytimes.com/2019/05/21/movies/the-...,Blythe Danner and John Lithgow strain to eleva...,The New York Times
2,"Horror, Thriller",English,United States of America,https://www.nytimes.com/2019/11/05/movies/marr...,Adam Driver and Scarlett Johansson self-consci...,The New York Times
3,"Horror, Thriller",English,United States of America,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times
4,"Horror, Thriller",English,United States of America,https://www.nytimes.com/2019/08/08/movies/love...,"Anton Yelchin, who played Chekov in the J.J. A...",The New York Times


In [155]:
# Drop "byline.person" column
nyt_tmdb = nyt_tmdb.drop(columns = ["byline.person"], axis = 1)
nyt_tmdb.columns

Index(['title', 'original_title', 'budget', 'original_language', 'homepage',
       'overview', 'popularity', 'runtime', 'revenue', 'release_date',
       'vote_average', 'vote_count', 'genres', 'spoken_languages',
       'production_countries', 'web_url', 'snippet', 'source', 'keywords',
       'pub_date', 'word_count', 'headline.main', 'headline.kicker',
       'headline.content_kicker', 'headline.print_headline', 'headline.name',
       'headline.seo', 'headline.sub', 'byline.original',
       'byline.organization'],
      dtype='object')

In [158]:
# Delete duplicate rows and reset index
#CE Note: printed before and after info details
print(nyt_tmdb.info())
nyt_tmdb_df = nyt_tmdb.drop_duplicates()
nyt_tmdb_df = nyt_tmdb_df.reset_index(drop=True)
nyt_tmdb_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 30 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   title                    33 non-null     object 
 1   original_title           33 non-null     object 
 2   budget                   33 non-null     int64  
 3   original_language        33 non-null     object 
 4   homepage                 33 non-null     object 
 5   overview                 33 non-null     object 
 6   popularity               33 non-null     float64
 7   runtime                  33 non-null     int64  
 8   revenue                  33 non-null     int64  
 9   release_date             33 non-null     object 
 10  vote_average             33 non-null     float64
 11  vote_count               33 non-null     int64  
 12  genres                   33 non-null     object 
 13  spoken_languages         33 non-null     object 
 14  production_countries     33 

In [159]:
# Export data to CSV without the index
nyt_tmdb_df.to_csv("movie_data_output.csv", index=False)