### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
import re
import urllib.parse

In [2]:
# Set environment variables from the .env in the local environment
load_dotenv('example.env')

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

print(nyt_api_key)


Z16TFGUI2bJAkYkAjxrjV8H3hdBokzZE


### Access the New York Times API

In [3]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = 'headline,web_url,snippet,source,keywords,pub_date,byline,word_count'

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
query_url = (
    f'{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}'
    + f'&fq={filter_query}&sort={sort}&fl={field_list}'
)

#query_url = f'{url}q={filter_query}&api-key={nyt_api_key}'
query_url

'https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=Z16TFGUI2bJAkYkAjxrjV8H3hdBokzZE&begin_date=20130101&end_date=20230531&fq=section_name:"Movies" AND type_of_material:"Review" AND headline:"love"&sort=newest&fl=headline,web_url,snippet,source,keywords,pub_date,byline,word_count'

In [4]:
#get response
response = requests.get(query_url)
response

<Response [200]>

In [10]:
# Create an empty list to store the reviews
articles_list = []

# loop through pages 0-19

# create query with a page number
for page in range(0, 20):
    query_url = (
    f'{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}'
    + f'&fq={filter_query}&sort={sort}&fl={field_list}')
    
    #plus pagenation
    request_url = f"{query_url}&page={str(page)}"
    
    # API results show 10 articles at a time
    # Make a "GET" request and retrieve the JSON
    request_reviews = requests.get(request_url).json()
  
    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
   # Try and save the reviews to the reviews_list
    try:
        
        # loop through the reviews["response"]["docs"] and append each review to the list
        for article in request_reviews['response']['docs']:
            articles_list.append(article)
            
            
        # Print the page that was just retrieved
            print(article['snippet'])
            print('---------------------------')
        # Print the page number that had no results then break from the loop
    except ValueError:
        print(f'{page}')
        break
  

A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.
---------------------------
Two childhood friends navigate cultural differences in this pleasantly uncontentious romantic comedy.
---------------------------
Religion comes between two girls falling in love in the 1990s in this sweet coming-of-age film bathed in grunge hues.
---------------------------
Rachael Leigh Cook stars in this bland rom-com as a travel executive exploring Vietnam and getting over a breakup.
---------------------------
A radiant Virginie Efira stars as a Parisian teacher who blissfully falls for a man and his 4-year-old daughter, complicating everyone’s lives.
---------------------------
A film adaptation of Taylor Jenkins Reid’s novel has potential for drama, but it stumbles on stock melodrama.
---------------------------
There’s not much Lennon music heard in this doc about his affair with May Pang, and given how much Pang trashes his wife, Yoko Ono

An HBO documentary examines a music festival that went so far off the rails that it defined an era.
---------------------------
Benoît Jacquot’s erotic costume drama envisions the Italian playboy as a weathered sad sack living in exile.
---------------------------
Two childhood best friends reunite in this unfocused yet uplifting sports drama by the filmmaker Jan Philipp Weyl.
---------------------------
This drama about a young single mother and a terminally ill daughter avoids sentimentalism but indulges heavily in dourness.
---------------------------
Paula Beer and Franz Rogowski, who made an impression in 2019’s “Transit,” are reunited by the director Christian Petzold for this adaptation of a European myth.
---------------------------
This small-town comedy from New Zealand looks at the romance between a country girl and a man accused of killing his family.
---------------------------
A conservative father who could not accept his son’s sexuality is led on a contemplative tour of

The story of a romance between two Kenyan women was shown at Cannes after being banned in Kenya.
---------------------------
Mary Kay Place is astonishing as a weary widow dealing with guilt, heartache and a diminishing circle of friends.
---------------------------
Haley Lu Richardson and Cole Sprouse navigate the rules of attraction and of a hospital in this film about living with cystic fibrosis.
---------------------------
In Jia Zhangke’s enthralling new feature, Zhao Tao and Liao Fan portray an underworld power couple at the mercy of time and fate.
---------------------------
The Chilean writer-director Sebastián Lelio finds a perfect partner in Moore for this fantastic remake of his 2013 movie “Gloria.”
---------------------------
In Khalik Allah’s poetic movie, the most prevalent perspective is not that of a documentarian, but of a filmmaker’s love.
---------------------------
Something strange is going on in the Irish countryside, and it’s not just the giant roiling pit of dir

Twenty-five years after they split, two former high school sweethearts reluctantly reunite for an awkward night.
---------------------------
A couple break up, but remain together, frustratingly, because of the husband’s financial woes.
---------------------------
In war-torn Liberia, Charlize Theron and Javier Bardem face a big problem: how to keep their knotty relationship from fizzling.
---------------------------
In Ingrid Jungermann’s droll comedy, ex-lovers who produce a podcast about serial killers find themselves at odds when a new romantic interest emerges.
---------------------------
Isabelle Huppert and Louis Garrel star in a screen version of an 18th-century Marivaux play and the last film directed by Luc Bondy.
---------------------------
In “My Journey Through French Cinema,” Mr. Tavernier’s documentary about movies doubles as an autobiography.
---------------------------
Kumail Nanjiani stars opposite Zoe Kazan in this very funny, moving true story about how he found and

In [13]:
# Preview the first 5 results in JSON format
    
# Use json.dumps with argument indent=4 to format data
print(json.dumps(articles_list[0:5], indent=4))


[
    {
        "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
        "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "The Attachment Diaries",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "The Attachment Diaries (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
 

In [14]:
# Convert results list to JSON normalized Pandas DataFrame
reviews_df = pd.json_normalize(articles_list)
reviews_df.head(5)

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",


In [15]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early

#this was way too advanced for me, i used chatgpt, and it was apparently too advanced for chatgpt... 
#took us about 45 min to figure out. we definitely never covered this stuff, this homework is way out of control
#way too time consuming

#title_extract = reviews_df['headline.main'].str.extract(r"(?:‘|“)(.*?(?:Review)?)['”]|Review: (.*?)$", flags=re.UNICODE)
#reviews_df['title'] = title_extract.iloc[:, 0].fillna('') + title_extract.iloc[:, 1].fillna('')

# Ensure that titles end with the word "Review"
#reviews_df['title'] = reviews_df['title'].apply(lambda x: x + ' Review' if x and not x.endswith('Review') else x)


#title_extract = reviews_df['headline.main'].str.extract(r"(?:|“)(.*?)(?:Review: |Review$)", flags=re.UNICODE)
#reviews_df['title'] = title_extract.iloc[:, 0].fillna('')

pattern = r"(?:‘|“)(.*?)(?:’|”)(?:\s*Review|$)"
title_extract = reviews_df['headline.main'].str.extract(pattern, flags=re.UNICODE)
reviews_df['title'] = title_extract.iloc[:, 0].str.strip()

# Display the DataFrame with the new "title" column
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist’s Guide to Love
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People’s Children
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",,The Other Half
196,https://www.nytimes.com/2017/03/09/movies/revi...,A nurse travels to the Ottoman Empire on the e...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-03-09T21:53:12+0000,267,"Review: Love as the World Wars, in ‘The Ottoma...",,,"Review: Love as the World Wars, in ‘The Ottoma...",,,,By Neil Genzlinger,"[{'firstname': 'Neil', 'middlename': None, 'la...",,The Ottoman Lieutenant
197,https://www.nytimes.com/2017/03/02/movies/love...,Josh Kornbluth runs afoul of the Internal Reve...,The New York Times,"[{'name': 'creative_works', 'value': 'Love & T...",2017-03-02T21:44:18+0000,246,Review: It’s All Mirth and Taxes in ‘Love & Ta...,,,"It’s Inevitable, Mirth and Taxes",,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Love & Taxes
198,https://www.nytimes.com/2017/02/16/movies/ever...,A messed-up heroine is asked to choose between...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-02-16T21:45:50+0000,256,"Review: ‘Everybody Loves Somebody,’ a Rom-Com ...",,,Everybody Loves Somebody,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,


In [16]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
#reviews_df['keywords'] = reviews_df['keywords'].apply(extract_keywords)

# Display the DataFrame with the updated "keywords" column
#reviews_df[['headline.main', 'keywords']]

#I feel like these are the correct answers, but it keeps returning "TypeError: string indices must be integers"
#so its back t ochat gpt, this homework really is much more advanced than i feel like the class has prepared the students for

# Replace the existing 'keywords' column with a string representation
reviews_df['keywords'] = reviews_df['keywords'].apply(lambda x: str(x) if isinstance(x, list) else x)

# Display the DataFrame with the updated "keywords" column
reviews_df[['headline.main', 'keywords']]

Unnamed: 0,headline.main,keywords
0,"‘The Attachment Diaries’ Review: Love, Sick","[{'name': 'subject', 'value': 'Movies', 'rank'..."
1,Review: ‘What’s Love Got to Do With It?’ Proba...,"[{'name': 'subject', 'value': 'Movies', 'rank'..."
2,‘You Can Live Forever’ Review: Do You Love Me ...,"[{'name': 'subject', 'value': 'Movies', 'rank'..."
3,‘A Tourist’s Guide to Love’ Review: A Wearying...,"[{'name': 'subject', 'value': 'Movies', 'rank'..."
4,‘Other People’s Children’ Review: True Romance,"[{'name': 'subject', 'value': 'Movies', 'rank'..."
...,...,...
195,Review: A Combustible Pair Find Love in ‘The O...,"[{'name': 'subject', 'value': 'Movies', 'rank'..."
196,"Review: Love as the World Wars, in ‘The Ottoma...","[{'name': 'subject', 'value': 'Movies', 'rank'..."
197,Review: It’s All Mirth and Taxes in ‘Love & Ta...,"[{'name': 'creative_works', 'value': 'Love & T..."
198,"Review: ‘Everybody Loves Somebody,’ a Rom-Com ...","[{'name': 'subject', 'value': 'Movies', 'rank'..."


In [17]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
tmd_list = reviews_df['title'].astype(str).to_list()
tmd_list

['The Attachment Diaries',
 'nan',
 'You Can Live Forever',
 'A Tourist’s Guide to Love',
 'Other People’s Children',
 'One True Loves',
 'The Lost Weekend: A Love Story',
 'A Thousand and One',
 'Your Place or Mine',
 'Love in the Time of Fentanyl',
 'Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley’s Lover',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Policeman',
 'About Fate',
 'Waiting for Bojangles',
 'I Love My Dad',
 'A Love Song',
 'Alone Together',
 'Art of Love',
 'The Wheel',
 'Thor: Love and Thunder',
 'Both Sides of the Blade',
 'Fire of Love',
 'Love & Gelato',
 'Stay Prayed Up',
 'Benediction',
 'Dinner in America',
 'In a New York Minute',
 'Anaïs in Love',
 'I Love America',
 'See You Then',
 'La Mami',
 'Love After Love',
 'Deep Water',
 'Lucy and Desi',
 'Cyrano',
 'The In Between',
 'Book of Love',
 'Lingui, the Sacred Bonds',
 'The Pink Cloud',
 'A Journal for Jordan',
 'West Side 

### Access The Movie Database API

In [18]:
# Prepare The Movie Database query

url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key


In [24]:
# Create an empty list to store the results
movie_data = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests
request_counter = 1

# Loop through the titles
for movie in tmd_list:
    #tmdb_request = f'{url}+"{movie}"+{tmdb_key_string}'
    tmdb_request = f"{url}+{movie.replace(' ', '+')}+{tmdb_key_string}"
    # Check if we need to sleep before making a request
    if request_counter % 50 == 0 and request_counter > 0:
        print(f"Pausing for a moment after {request_counter} requests...")
        time.sleep(12)
    # Add 1 to the request counter
    request_counter += 1
    
    # Perform a "GET" request for The Movie Database
    response = requests.get(tmdb_request).json()

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try:
        # Get movie id
        movie_id = response['results'][0]['id']

        # Make a request for the full movie details
        movie_details_request = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}'
        #https://api.themoviedb.org/3/movie/343611?api_key=6966c789fc247e4a29bf3336097831b9
        # Execute "GET" request with url
        movie_details = requests.get(movie_details_request).json()
        
        # Extract the genre names into a list
        genres = []
        for genre in movie_details['genres']:
            genres.append(genre['name'])
            
        # Extract the spoken_languages' English name into a list
        spoken_languages = []
        for spoken_language in movie_details['spoken_languages']:
            spoken_languages.append(spoken_language['english_name'])
                
        # Extract the production_countries' name into a list
        production_countries = []
        for production_countrie in movie_details['production_countries']:
            production_countries.append(production_countrie['name'])
        
        # Add the relevant data to a dictionary and
        # append it to the results list
        movie_data.append({
            'title': movie_details['title'],
            'genres': genres,
            'spoken_languages': spoken_languages,
            'production_countries': production_countries
        })
      #  results.append(movie_data)
       
        print(f"Found movie: {movie}")

    #except KeyError: 
   #     print(f"Movie not found: {movie}")
    except (KeyError, IndexError) as e:
        print(f"KeyError: {e}")
        print(f"Movie details response: {response}")
            
else:
    print(f"Invalid data type for movie: {type(movie)}")

Found movie: The Attachment Diaries
Found movie: nan
Found movie: You Can Live Forever
Found movie: A Tourist’s Guide to Love
Found movie: Other People’s Children
Found movie: One True Loves
Found movie: The Lost Weekend: A Love Story
Found movie: A Thousand and One
Found movie: Your Place or Mine
Found movie: Love in the Time of Fentanyl
Found movie: Pamela, a Love Story
Found movie: In From the Side
Found movie: After Love
Found movie: Alcarràs
Found movie: Nelly & Nadine
Found movie: Lady Chatterley’s Lover
Found movie: The Sound of Christmas
Found movie: The Inspection
Found movie: Bones and All
Found movie: My Policeman
Found movie: About Fate
Found movie: Waiting for Bojangles
Found movie: I Love My Dad
Found movie: A Love Song
Found movie: Alone Together
Found movie: Art of Love
Found movie: The Wheel
Found movie: Thor: Love and Thunder
Found movie: Both Sides of the Blade
Found movie: Fire of Love
Found movie: Love & Gelato
Found movie: Stay Prayed Up
Found movie: Benediction
F

In [26]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
preview_results = movie_data[:5]
formatted_results = json.dumps(preview_results, indent=4)
print(formatted_results)

[
    {
        "title": "The Attachment Diaries",
        "genres": [
            "Drama",
            "Mystery",
            "Thriller",
            "Horror"
        ],
        "spoken_languages": [
            "Spanish"
        ],
        "production_countries": [
            "Argentina"
        ]
    },
    {
        "title": "The Time of Huan Nan",
        "genres": [
            "Drama",
            "Fantasy"
        ],
        "spoken_languages": [
            "Mandarin"
        ],
        "production_countries": [
            "Taiwan"
        ]
    },
    {
        "title": "You Can Live Forever",
        "genres": [
            "Drama",
            "Romance"
        ],
        "spoken_languages": [
            "French",
            "English"
        ],
        "production_countries": [
            "Canada"
        ]
    },
    {
        "title": "A Tourist's Guide to Love",
        "genres": [
            "Romance",
            "Comedy",
            "Adventure"
        ],
    

In [27]:
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(movie_data)
tmdb_df.head()

Unnamed: 0,title,genres,spoken_languages,production_countries
0,The Attachment Diaries,"[Drama, Mystery, Thriller, Horror]",[Spanish],[Argentina]
1,The Time of Huan Nan,"[Drama, Fantasy]",[Mandarin],[Taiwan]
2,You Can Live Forever,"[Drama, Romance]","[French, English]",[Canada]
3,A Tourist's Guide to Love,"[Romance, Comedy, Adventure]",[English],[United States of America]
4,Other People's Children,[Drama],[French],[France]


### Merge and Clean the Data for Export

In [79]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(tmdb_df, reviews_df, on="title")
merged_df.head(5)

Unnamed: 0,title,genres,spoken_languages,production_countries,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,The Attachment Diaries,"[Drama, Mystery, Thriller, Horror]",[Spanish],[Argentina],https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,You Can Live Forever,"[Drama, Romance]","[French, English]",[Canada],https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
2,One True Loves,"[Romance, Drama, Comedy]",[English],"[Germany, United States of America]",https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu,"[{'firstname': 'Brandon', 'middlename': None, ...",
3,The Lost Weekend: A Love Story,[Documentary],[English],[United States of America],https://www.nytimes.com/2023/04/13/movies/the-...,There’s not much Lennon music heard in this do...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2023-04-13T11:00:03+0000,327,‘The Lost Weekend: A Love Story’ Review: When ...,,,The Lost Weekend: A Love Story,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
4,Your Place or Mine,"[Romance, Comedy]",[English],[United States of America],https://www.nytimes.com/2023/02/09/movies/your...,This humdrum Netflix romantic comedy features ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-02-10T00:00:05+0000,569,‘Your Place or Mine’ Review: Try Neither,,,They Have a Humdrum Kind of Love,,,,By Amy Nicholson,"[{'firstname': 'Amy', 'middlename': None, 'las...",


In [80]:
merged_df.columns

Index(['title', 'genres', 'spoken_languages', 'production_countries',
       'web_url', 'snippet', 'source', 'keywords', 'pub_date', 'word_count',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub', 'byline.original', 'byline.person',
       'byline.organization'],
      dtype='object')

In [81]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing

columns_to_fix = ['genres', 'spoken_languages', 'production_countries',
       'web_url', 'keywords',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub',  'byline.person',
       ] 
# Create a list of characters to remove
characters_to_remove = ['[', ']', '"', '{','}', "'"]

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype(str)

    # Loop through characters to remove
    for char in characters_to_remove:
        merged_df[column] = merged_df[column].str.replace(char, '')

# Display the fixed DataFrame
merged_df

Unnamed: 0,title,genres,spoken_languages,production_countries,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,The Attachment Diaries,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"firstname: Jeannette, middlename: None, lastna...",
1,You Can Live Forever,"Drama, Romance","French, English",Canada,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"firstname: Elisabeth, middlename: None, lastna...",
2,One True Loves,"Romance, Drama, Comedy",English,"Germany, United States of America",https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu,"firstname: Brandon, middlename: None, lastname...",
3,The Lost Weekend: A Love Story,Documentary,English,United States of America,https://www.nytimes.com/2023/04/13/movies/the-...,There’s not much Lennon music heard in this do...,The New York Times,"name: subject, value: Documentary Films and Pr...",2023-04-13T11:00:03+0000,327,‘The Lost Weekend: A Love Story’ Review: When ...,,,The Lost Weekend: A Love Story,,,,By Glenn Kenny,"firstname: Glenn, middlename: None, lastname: ...",
4,Your Place or Mine,"Romance, Comedy",English,United States of America,https://www.nytimes.com/2023/02/09/movies/your...,This humdrum Netflix romantic comedy features ...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-02-10T00:00:05+0000,569,‘Your Place or Mine’ Review: Try Neither,,,They Have a Humdrum Kind of Love,,,,By Amy Nicholson,"firstname: Amy, middlename: None, lastname: Ni...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,In Search of Fellini,"Drama, Adventure",English,,https://www.nytimes.com/2017/09/14/movies/in-s...,A sheltered young woman enamored of the films ...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2017-09-14T11:00:24+0000,272,Review: Love of Il Maestro Drives ‘In Search o...,,,In Search of Fellini,,,,By Monica Castillo,"firstname: Monica, middlename: None, lastname:...",
100,The Last Face,Drama,"English, Portuguese",United States of America,https://www.nytimes.com/2017/07/27/movies/the-...,"In war-torn Liberia, Charlize Theron and Javie...",The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2017-07-27T20:29:01+0000,280,Review: Aid Workers in Love and War in Sean Pe...,,,The Last Face,,,,By Glenn Kenny,"firstname: Glenn, middlename: None, lastname: ...",
101,Lost in Paris,Comedy,French,"Belgium, France",https://www.nytimes.com/2017/06/15/movies/lost...,The filmmakers Fiona Gordon and Dominique Abel...,The New York Times,"name: creative_works, value: Lost in Paris (Mo...",2017-06-15T19:37:09+0000,254,Review: Finding Love (and Slapstick) While ‘Lo...,,,Lost in Paris,,,,By Ben Kenigsberg,"firstname: Ben, middlename: None, lastname: Ke...",
102,The Other Half,"Drama, Romance",English,Canada,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"firstname: Andy, middlename: None, lastname: W...",


In [82]:
merged_df = merged_df.dropna(axis=1, how='all')
merged_df.head(3)

Unnamed: 0,title,genres,spoken_languages,production_countries,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person
0,The Attachment Diaries,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"firstname: Jeannette, middlename: None, lastna..."
1,You Can Live Forever,"Drama, Romance","French, English",Canada,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"firstname: Elisabeth, middlename: None, lastna..."
2,One True Loves,"Romance, Drama, Comedy",English,"Germany, United States of America",https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu,"firstname: Brandon, middlename: None, lastname..."


In [83]:
# Drop "byline.person" column
merged_df = merged_df.drop(columns=['byline.person'], axis=1)
merged_df.head()

Unnamed: 0,title,genres,spoken_languages,production_countries,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original
0,The Attachment Diaries,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis
1,You Can Live Forever,"Drama, Romance","French, English",Canada,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli
2,One True Loves,"Romance, Drama, Comedy",English,"Germany, United States of America",https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu
3,The Lost Weekend: A Love Story,Documentary,English,United States of America,https://www.nytimes.com/2023/04/13/movies/the-...,There’s not much Lennon music heard in this do...,The New York Times,"name: subject, value: Documentary Films and Pr...",2023-04-13T11:00:03+0000,327,‘The Lost Weekend: A Love Story’ Review: When ...,,,The Lost Weekend: A Love Story,,,,By Glenn Kenny
4,Your Place or Mine,"Romance, Comedy",English,United States of America,https://www.nytimes.com/2023/02/09/movies/your...,This humdrum Netflix romantic comedy features ...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-02-10T00:00:05+0000,569,‘Your Place or Mine’ Review: Try Neither,,,They Have a Humdrum Kind of Love,,,,By Amy Nicholson


In [84]:
# Delete duplicate rows and reset index
merged_df.drop_duplicates().reset_index(drop=True)
merged_df.head(1)

Unnamed: 0,title,genres,spoken_languages,production_countries,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original
0,The Attachment Diaries,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis


In [95]:
almost_final_df = merged_df.drop(columns=['headline.kicker', 'headline.content_kicker',\
                                    'headline.name', 'headline.seo','headline.sub'], axis=1)
almost_final_df.head()

Unnamed: 0,title,genres,spoken_languages,production_countries,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.print_headline,byline.original
0,The Attachment Diaries,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",The Attachment Diaries,By Jeannette Catsoulis
1,You Can Live Forever,"Drama, Romance","French, English",Canada,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,You Can Live Forever,By Elisabeth Vincentelli
2,One True Loves,"Romance, Drama, Comedy",English,"Germany, United States of America",https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,One True Loves,By Brandon Yu
3,The Lost Weekend: A Love Story,Documentary,English,United States of America,https://www.nytimes.com/2023/04/13/movies/the-...,There’s not much Lennon music heard in this do...,The New York Times,"name: subject, value: Documentary Films and Pr...",2023-04-13T11:00:03+0000,327,‘The Lost Weekend: A Love Story’ Review: When ...,The Lost Weekend: A Love Story,By Glenn Kenny
4,Your Place or Mine,"Romance, Comedy",English,United States of America,https://www.nytimes.com/2023/02/09/movies/your...,This humdrum Netflix romantic comedy features ...,The New York Times,"name: subject, value: Movies, rank: 1, major: ...",2023-02-10T00:00:05+0000,569,‘Your Place or Mine’ Review: Try Neither,They Have a Humdrum Kind of Love,By Amy Nicholson


In [96]:
# Export data to CSV without the index
almost_final_df.to_csv('output.csv', index=False)