In [295]:
# Import the requests library and API key
import pandas as pd
import requests
from config import tmdb_api_key

# Part 1: Preliminary Code

In [296]:
# Starting URL for TMDB API discover method including the API key from config.py. Using the language (English),
# region (US), release date start (01-01-2000) and end (12-31-2022), sort by (release date descending) parameters.
# Not included are the "page" and "certification" parameters which will be included in the function later below.
url = "https://api.themoviedb.org/3/discover/movie?api_key=" + tmdb_api_key + "&language=en-US&region=US&sort_by=primary_release_date.desc&certification_country=US&include_adult=false&include_video=false&primary_release_date.gte=2000-01-01&primary_release_date.lte=2022-12-31&with_original_language=en"

In [297]:
# Create a function that can take different URLs that you input to iterate through multiple pages of 
# the API call and to constrain the search to particular certification ratings. Specifying the params 
# for params with a dictionary containing the key/value pair for "page" and "certification".

def get_movies(url_name, page_num, rating):
    movies = requests.get(
        url_name,
        params={
            "page": page_num,
            "certification": rating}
    )
    # Create a variable to hold the JSON text of the 'get' request
    movies_data = movies.json()
    # Create a Pandas Dataframe for 'results' from the JSON dictionary
    return pd.DataFrame(movies_data['results'])

In [298]:
# Create a function to get the additional movie information for every individual movie ID in the movies dataframe.

def get_info(movie_url,id_list):
#     Create a function to input each individual movie ID into the movie_id_url.
    def get_id_url(id_index):
        movie_id_url = "https://api.themoviedb.org/3/movie/" + str(id_list[id_index]) + "?api_key=" + tmdb_api_key
        return movie_id_url
#     Create a list of movie URLs based on each movie's ID.
    movie_id_urls = [get_id_url(n) for n in range(len(id_list))]
    info_request = requests.get(movie_id_urls[movie_url])
    info_data = info_request.json()
    return pd.json_normalize(info_data)

# Part 2: LGBT Movies

## Create dataframes for all LGBT movies and LGBT movies that received a certain MPAA rating

In [299]:
# Create URL for movies tagged with keywords "LGBT, gay, lesbian, trans, transgender, and gay teen."
lgbt_url = url + "&with_keywords=158718%7C264384%7C264386%7C273637%7C290527%7C163037"

In [300]:
# The number of pages you want. (Note: the max number of pages for the ratings comes from the total amount of
# pages that the API call will iterate through to get all movies regardless of their certification rating).
pages_requested = 65

# Get all pages for each US rating as dataframes by calling the function "get_ratings()" 
# where "page_num" is variable n + 1. The list comprehension will iterate through the range 
# of "pages_requested" which is 12, adding 1 to each iteration to make the count 1-12. The get_ratings function
# uses the second variable to specify the rating needed in the URL parameter.
pages_lgbt = [get_movies(lgbt_url, n + 1, "") for n in range(pages_requested)]
pages_nr_lgbt = [get_movies(lgbt_url, n + 1, "NR") for n in range(pages_requested)]
pages_g_lgbt = [get_movies(lgbt_url, n + 1, "G") for n in range(pages_requested)]
pages_pg_lgbt = [get_movies(lgbt_url, n + 1, "PG") for n in range(pages_requested)]
pages_pg13_lgbt = [get_movies(lgbt_url, n + 1, "PG-13") for n in range(pages_requested)]
pages_r_lgbt = [get_movies(lgbt_url, n + 1, "R") for n in range(pages_requested)]
pages_nc17_lgbt = [get_movies(lgbt_url, n + 1, "NC-17") for n in range(pages_requested)]

# Combine the pages for each dataframe to single dataframe using the concat function (aka concatenate).
lgbt_movies_df = pd.concat(pages_lgbt)
nr_lgbt_df = pd.concat(pages_nr_lgbt)
g_lgbt_df = pd.concat(pages_g_lgbt) 
pg_lgbt_df = pd.concat(pages_pg_lgbt)
pg13_lgbt_df = pd.concat(pages_pg13_lgbt)
r_lgbt_df = pd.concat(pages_r_lgbt)
nc17_lgbt_df = pd.concat(pages_nc17_lgbt)

In [301]:
# Verify that the lgbt_movies_df was created correctly with all the necessary results.
print(len(lgbt_movies_df))
lgbt_movies_df.head()

1299


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,,"[35, 18]",507903,en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0
1,False,,"[27, 53]",876802,en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0
2,False,,[18],793992,en,Three Months,"The story of Caleb, a South Florida teen who l...",2.05,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0
3,False,,[10749],929477,en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,5.582,,2022-02-17,Heart Shot,False,0.0,0
4,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],920345,en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0


In [302]:
# Check for duplicates
lgbt_movies_df[lgbt_movies_df.id.duplicated()]

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count


In [303]:
# Verify that the r_lgbt_df was created correctly with all the necessary results. (This will ensure all other
# ratings dataframes were created correctly as well).
print(len(r_lgbt_df))
r_lgbt_df.head()

109


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,,"[27, 53]",876802,en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0
1,False,/Ainzn99TYR1LZWL7pKEBx3AH8Oy.jpg,[35],915164,en,TWO BEERS,"Two lifelong friends, Graham and Adrian, have ...",0.6,/pXEgPcmE1XRwF6543iOFM2XLGzU.jpg,2021-12-10,TWO BEERS,False,10.0,1
2,False,/nsyyWY7F3wjZT6aCkwWIsZXI4wV.jpg,[99],857731,en,Untold: Caitlyn Jenner,Caitlyn Jenner's unlikely path to Olympic glor...,7.671,/gSOXqj8HLTMzdifHOOQnbjgVEVr.jpg,2021-08-24,Untold: Caitlyn Jenner,False,6.5,24
3,False,/zKD0XxAjtK0VYX7j1elDtSNSNbR.jpg,"[18, 12]",552269,en,Joe Bell,"The true story of a small town, working class ...",15.152,/9HfQD60w6f6PPkfdiI6ipJSw0bD.jpg,2021-07-23,Joe Bell,False,6.0,50
4,False,/5C8bfwglg91uZhc2fbfpSjNGamV.jpg,"[27, 9648]",591273,en,Fear Street: 1994,"After a series of brutal slayings, a teen and ...",148.336,/9J9Wy39ZjrVmfk7yMkulpcI5sy0.jpg,2021-06-28,Fear Street: 1994,False,6.8,1390


## Ratings Data

### Create a new column to hold rating info for each individual dataframe

In [304]:
# Create a variable to hold the list of US ratings names.
ratings = ["NR", "G", "PG", "PG-13", "R", "NC-17"]

In [305]:
# Verify the length of each dataframe.
# (Note: there are no films in the G rating dataframe, meaning there are no LGBT films with a G rating).)
print(len(nr_lgbt_df))
print(len(g_lgbt_df))
print(len(pg_lgbt_df))
print(len(pg13_lgbt_df))
print(len(r_lgbt_df))
print(len(nc17_lgbt_df))

225
0
12
47
109
3


In [306]:
# Add a new column in each specific dataframe to label each movie in the dataframe with their corresponding rating
# (this will be important to have later when joining these dataframes into the larger lgbt_movies_df).
nr_lgbt_df["rating"] = ratings[0]
pg_lgbt_df["rating"] = ratings[2]
pg13_lgbt_df["rating"] = ratings[3]
r_lgbt_df["rating"] = ratings[4]
nc17_lgbt_df["rating"] = ratings[5]

In [307]:
# Verify the new column was created correctly on one of the dataframes.
nr_lgbt_df.head(1)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating
0,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],920345,en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,NR


In [308]:
# Drop all columns except for "id" and "rating" from the individual rating dataframes. Column "id" will be needed
# later for joining the dataframes to lgbt_movies_df. (Note: the dataframe for "G" ratings is not included as the 
# dataframe is empty as shown above).
nr_lgbt_df = nr_lgbt_df[["id", "rating"]]
pg_lgbt_df = pg_lgbt_df[["id", "rating"]]
pg13_lgbt_df = pg13_lgbt_df[["id", "rating"]]
r_lgbt_df = r_lgbt_df[["id", "rating"]]
nc17_lgbt_df = nc17_lgbt_df[["id", "rating"]]

In [309]:
# Verify "r" rating dataframe dropped the correct columns.
r_lgbt_df.head()

Unnamed: 0,id,rating
0,876802,R
1,915164,R
2,857731,R
3,552269,R
4,591273,R


### Create singlular dataframe to hold all rated movies.

In [310]:
# Join the individual ratings dataframes into a single ratings dataframe.
lgbt_ratings_df = pd.concat([nr_lgbt_df, pg_lgbt_df, pg13_lgbt_df, r_lgbt_df, nc17_lgbt_df])

# Reset the index and drop the former index column for clearer legibility.
lgbt_ratings_df = lgbt_ratings_df.reset_index().drop(columns='index')
lgbt_ratings_df.tail()

Unnamed: 0,id,rating
391,95048,R
392,22597,R
393,526034,NC-17
394,94025,NC-17
395,1378,NC-17


In [311]:
# Check for duplicates in ratings dataframe
lgbt_ratings_df[lgbt_ratings_df.id.duplicated()]

Unnamed: 0,id,rating
328,157370,R
349,39958,R
382,16802,R


In [312]:
# Determine how many duplicates there are of each and why.
lgbt_ratings_df.loc[lgbt_ratings_df.id.isin([157370,39958,16802])]

Unnamed: 0,id,rating
106,157370,NR
159,39958,NR
216,16802,NR
328,157370,R
349,39958,R
382,16802,R


In [313]:
# Drop duplicates and verify that they correctly dropped.
lgbt_ratings_df = lgbt_ratings_df.drop_duplicates(subset=['id'], keep='last')
lgbt_ratings_df.loc[lgbt_ratings_df.id.isin([157370,39958,16802])]

Unnamed: 0,id,rating
328,157370,R
349,39958,R
382,16802,R


### Join ratings dataframe into main movies dataframe.

In [314]:
# Set index to be "id" for all dataframes.
lgbt_movies_df = lgbt_movies_df.set_index('id')
lgbt_ratings_df = lgbt_ratings_df.set_index('id')

In [315]:
# Verify that index was set correctly
lgbt_ratings_df.head()

Unnamed: 0_level_0,rating
id,Unnamed: 1_level_1
920345,NR
723377,NR
879943,NR
849934,NR
776586,NR


In [316]:
# Join the ratings dataframe into the movies dataframe to add the certification rating to the movies dataframe.
lgbt_movies_df = lgbt_movies_df.join(lgbt_ratings_df, on='id', how='left')

In [317]:
# Verify the length of the dataframe and that the ratings (for movies that had ratings) were added.
print(len(lgbt_movies_df))
lgbt_movies_df.head()

1299


Unnamed: 0_level_0,adult,backdrop_path,genre_ids,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
507903,False,,"[35, 18]",en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0,
876802,False,,"[27, 53]",en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0,R
793992,False,,[18],en,Three Months,"The story of Caleb, a South Florida teen who l...",2.05,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0,
929477,False,,[10749],en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,5.582,,2022-02-17,Heart Shot,False,0.0,0,PG-13
920345,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,NR


In [318]:
# Swap null values in ratings column for NR
lgbt_movies_df["rating"].fillna("NR", inplace=True)

In [319]:
# Verify null values were replaced.
lgbt_movies_df.head()

Unnamed: 0_level_0,adult,backdrop_path,genre_ids,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
507903,False,,"[35, 18]",en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0,NR
876802,False,,"[27, 53]",en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0,R
793992,False,,[18],en,Three Months,"The story of Caleb, a South Florida teen who l...",2.05,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0,NR
929477,False,,[10749],en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,5.582,,2022-02-17,Heart Shot,False,0.0,0,PG-13
920345,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,NR


In [320]:
# Reset index.
lgbt_movies_df = lgbt_movies_df.reset_index()

In [321]:
# Verify index reset correctly.
lgbt_movies_df.head()

Unnamed: 0,id,adult,backdrop_path,genre_ids,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating
0,507903,False,,"[35, 18]",en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0,NR
1,876802,False,,"[27, 53]",en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0,R
2,793992,False,,[18],en,Three Months,"The story of Caleb, a South Florida teen who l...",2.05,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0,NR
3,929477,False,,[10749],en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,5.582,,2022-02-17,Heart Shot,False,0.0,0,PG-13
4,920345,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,NR


### Drop columns from main dataframe

In [322]:
# Drop unnecessary columns from lgbt_movies_df dataframe.
lgbt_movies_df = lgbt_movies_df.drop(columns=[
    "adult", "backdrop_path", "original_language", "poster_path", "video", "vote_average", "vote_count"])

In [323]:
# Verify columns dropped and index reset correctly.
lgbt_movies_df.loc[[0, 360, 792]]

Unnamed: 0,id,genre_ids,original_title,overview,popularity,release_date,title,rating
0,507903,"[35, 18]",Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,2022-12-31,Spoiler Alert,NR
360,449176,"[35, 18, 10749]","Love, Simon",Everyone deserves a great love story. But for ...,42.282,2018-03-16,"Love, Simon",PG-13
792,84892,[18],The Perks of Being a Wallflower,"Pittsburgh, Pennsylvania, 1991. High school fr...",41.253,2012-09-21,The Perks of Being a Wallflower,PG-13


### Clean "overview" column

In [324]:
# Ensure that there are no "\r" substrings in the string of the "overview" column. This would cause formatting
# issues when the dataframe is exported to a CSV
lgbt_movies_df['overview'] = lgbt_movies_df['overview'].str.replace("\r", "")

In [325]:
lgbt_movies_df

Unnamed: 0,id,genre_ids,original_title,overview,popularity,release_date,title,rating
0,507903,"[35, 18]",Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,2022-12-31,Spoiler Alert,NR
1,876802,"[27, 53]",Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,2022-10-01,Saint Drogo,R
2,793992,[18],Three Months,"The story of Caleb, a South Florida teen who l...",2.050,2022-02-23,Three Months,NR
3,929477,[10749],Heart Shot,Teenagers Nikki and Sam are in love and planni...,5.582,2022-02-17,Heart Shot,PG-13
4,920345,[18],EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),NR
...,...,...,...,...,...,...,...,...
1294,248757,"[35, 10749]",Punks,"In his directorial debut, Patrik-Ian Polk chro...",1.126,2000-01-24,Punks,PG-13
1295,515728,[18],Hitch,Two friends are on a road trip and a one-sided...,1.074,2000-01-01,Hitch,NR
1296,306484,[],Meet Joe Gay,Director Benjamin Morgan takes a hard look at ...,1.078,2000-01-01,Meet Joe Gay,NR
1297,262942,[99],A Boy Named Sue,A Boy Named Sue chronicles the transformation ...,1.298,2000-01-01,A Boy Named Sue,NR


## Gather Genre Info


### Create dataframe to hold genre names based on genre IDs

In [326]:
# Create genre_url variable
genre_url = "https://api.themoviedb.org/3/genre/movie/list?api_key=" + tmdb_api_key + "&language=en-US"

In [327]:
# Get the genre JSON info into a list.
genre_request = requests.get(genre_url)
genre_data = genre_request.json()
genre_info = genre_data["genres"]
genre_info

[{'id': 28, 'name': 'Action'},
 {'id': 12, 'name': 'Adventure'},
 {'id': 16, 'name': 'Animation'},
 {'id': 35, 'name': 'Comedy'},
 {'id': 80, 'name': 'Crime'},
 {'id': 99, 'name': 'Documentary'},
 {'id': 18, 'name': 'Drama'},
 {'id': 10751, 'name': 'Family'},
 {'id': 14, 'name': 'Fantasy'},
 {'id': 36, 'name': 'History'},
 {'id': 27, 'name': 'Horror'},
 {'id': 10402, 'name': 'Music'},
 {'id': 9648, 'name': 'Mystery'},
 {'id': 10749, 'name': 'Romance'},
 {'id': 878, 'name': 'Science Fiction'},
 {'id': 10770, 'name': 'TV Movie'},
 {'id': 53, 'name': 'Thriller'},
 {'id': 10752, 'name': 'War'},
 {'id': 37, 'name': 'Western'}]

In [328]:
# Transform the list of dictionaries of genre information into a individual lists.
ids = [i["id"] for i in genre_info]
names = [n["name"] for n in genre_info]

print(ids)
print(names)

[28, 12, 16, 35, 80, 99, 18, 10751, 14, 36, 27, 10402, 9648, 10749, 878, 10770, 53, 10752, 37]
['Action', 'Adventure', 'Animation', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music', 'Mystery', 'Romance', 'Science Fiction', 'TV Movie', 'Thriller', 'War', 'Western']


In [329]:
# # Combine the lists "ids" and "names" into a single dictionary named "genre_dict".
# genres_dict = {"ids": ids, "names": names}

### Duplicate genre_ids column into a new dataframe

In [330]:
# Duplicate genre_ids column into new dataframe.
genres_df = pd.DataFrame()
genres_df[["id", "genres"]] = lgbt_movies_df[['id','genre_ids']]
genres_df.head()

Unnamed: 0,id,genres
0,507903,"[35, 18]"
1,876802,"[27, 53]"
2,793992,[18]
3,929477,[10749]
4,920345,[18]


In [331]:
print(len(genres_df))

1299


In [332]:
# Separate the lists of genres in the genre column to hold one individual genre per column
sep_genres_df = pd.DataFrame(
    genres_df['genres'].to_list(), columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6']
)

In [333]:
# Verify the columns split.
print(len(sep_genres_df))
sep_genres_df.head()

1299


Unnamed: 0,genre1,genre2,genre3,genre4,genre5,genre6
0,35.0,18.0,,,,
1,27.0,53.0,,,,
2,18.0,,,,,
3,10749.0,,,,,
4,18.0,,,,,


In [334]:
# sep_genres_df['id'] = genres_df['id']
# sep_genres_df = sep_genres_df.set_index('id')

In [335]:
print(len(sep_genres_df))
sep_genres_df.head()

1299


Unnamed: 0,genre1,genre2,genre3,genre4,genre5,genre6
0,35.0,18.0,,,,
1,27.0,53.0,,,,
2,18.0,,,,,
3,10749.0,,,,,
4,18.0,,,,,


### Transform Genre IDs into Genre Names.

In [336]:
# Replace all genre integer values for "genre_ids" to their associated string in genres_df.
sep_genres_df = sep_genres_df.replace(ids, names)

In [337]:
# Verify that genre names were replaced.
print(len(sep_genres_df))
sep_genres_df.loc[724:730]

1299


Unnamed: 0,genre1,genre2,genre3,genre4,genre5,genre6
724,Comedy,Drama,,,,
725,Drama,Romance,Thriller,,,
726,Drama,,,,,
727,Documentary,,,,,
728,Romance,Comedy,,,,
729,Comedy,Romance,,,,
730,Documentary,Comedy,,,,


In [338]:
# Create a new column that puts all genre names in every column of the dataframe into a list per movie.
sep_genres_df['genres'] = sep_genres_df.values.tolist()

In [339]:
# Check work.
print(len(sep_genres_df))
sep_genres_df.loc[724:730]

1299


Unnamed: 0,genre1,genre2,genre3,genre4,genre5,genre6,genres
724,Comedy,Drama,,,,,"[Comedy, Drama, nan, nan, nan, nan]"
725,Drama,Romance,Thriller,,,,"[Drama, Romance, Thriller, nan, nan, nan]"
726,Drama,,,,,,"[Drama, nan, nan, nan, nan, nan]"
727,Documentary,,,,,,"[Documentary, nan, nan, nan, nan, nan]"
728,Romance,Comedy,,,,,"[Romance, Comedy, nan, nan, nan, nan]"
729,Comedy,Romance,,,,,"[Comedy, Romance, nan, nan, nan, nan]"
730,Documentary,Comedy,,,,,"[Documentary, Comedy, nan, nan, nan, nan]"


In [340]:
# Remove the null values from the list.
sep_genres_df['genres'] = sep_genres_df.genres.apply(lambda x: [name for name in x if not pd.isnull(name)])

In [341]:
# Check work.
print(len(sep_genres_df))
sep_genres_df.loc[724:730]

1299


Unnamed: 0,genre1,genre2,genre3,genre4,genre5,genre6,genres
724,Comedy,Drama,,,,,"[Comedy, Drama]"
725,Drama,Romance,Thriller,,,,"[Drama, Romance, Thriller]"
726,Drama,,,,,,[Drama]
727,Documentary,,,,,,[Documentary]
728,Romance,Comedy,,,,,"[Romance, Comedy]"
729,Comedy,Romance,,,,,"[Comedy, Romance]"
730,Documentary,Comedy,,,,,"[Documentary, Comedy]"


In [342]:
# Drop individual genre columns.
sep_genres_df = sep_genres_df.drop(columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])

In [343]:
# Check work.
print(len(sep_genres_df))
sep_genres_df.loc[724:730]

1299


Unnamed: 0,genres
724,"[Comedy, Drama]"
725,"[Drama, Romance, Thriller]"
726,[Drama]
727,[Documentary]
728,"[Romance, Comedy]"
729,"[Comedy, Romance]"
730,"[Documentary, Comedy]"


### Add Genre Names to LGBT movies dataframe.

In [344]:
sep_genres_df['id'] = genres_df['id']
sep_genres_df = sep_genres_df.set_index('id')

In [345]:
sep_genres_df.loc[[156713, 157370, 221801, 212849, 212721, 216138, 270698]]

Unnamed: 0_level_0,genres
id,Unnamed: 1_level_1
156713,"[Comedy, Drama]"
157370,"[Drama, Romance, Thriller]"
221801,[Drama]
212849,[Documentary]
212721,"[Romance, Comedy]"
216138,"[Comedy, Romance]"
270698,"[Documentary, Comedy]"


In [346]:
# Add the genres column from genres_df into lgbt_movies_df.
lgbt_movies_df = lgbt_movies_df.join(sep_genres_df, on='id', how='left')
lgbt_movies_df.head()

Unnamed: 0,id,genre_ids,original_title,overview,popularity,release_date,title,rating,genres
0,507903,"[35, 18]",Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,2.796,2022-12-31,Spoiler Alert,NR,"[Comedy, Drama]"
1,876802,"[27, 53]",Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.828,2022-10-01,Saint Drogo,R,"[Horror, Thriller]"
2,793992,[18],Three Months,"The story of Caleb, a South Florida teen who l...",2.05,2022-02-23,Three Months,NR,[Drama]
3,929477,[10749],Heart Shot,Teenagers Nikki and Sam are in love and planni...,5.582,2022-02-17,Heart Shot,PG-13,[Romance]
4,920345,[18],EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.406,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),NR,[Drama]


In [347]:
lgbt_movies_df.loc[724:730]

Unnamed: 0,id,genre_ids,original_title,overview,popularity,release_date,title,rating,genres
724,156713,"[35, 18]",C.O.G.,A gay cocky young man travels to Oregon to wor...,5.207,2013-09-20,C.O.G.,R,"[Comedy, Drama]"
725,157370,"[18, 10749, 53]",Kill Your Darlings,A murder in 1944 draws together the great poet...,18.652,2013-10-16,Kill Your Darlings,R,"[Drama, Romance, Thriller]"
726,221801,[18],Naked as We Came,"Love, loss and hope are tumultuously explored ...",6.859,2013-09-13,Naked as We Came,NR,[Drama]
727,212849,[99],The Dog,"In 1972, John Wojtowicz attempted to rob a Bro...",4.18,2014-08-05,The Dog,NR,[Documentary]
728,212721,"[10749, 35]",Gerontophilia,Lake is in a straight relationship with Desire...,10.49,2014-05-24,Gerontophilia,NR,"[Romance, Comedy]"
729,216138,"[35, 10749]",10 Rules for Sleeping Around,"By following ten simple rules, 20-somethings ...",10.611,2014-04-04,10 Rules for Sleeping Around,R,"[Comedy, Romance]"
730,270698,"[99, 35]",I'm a Porn Star,I'm a Porn Star follows the lives of guys in t...,14.984,2013-10-04,I'm a Porn Star,NR,"[Documentary, Comedy]"


In [49]:
# Set up the new column order.
columns = [
    'id', 'genre_ids', 'genre', 'original_title', 'rating', 'overview', 'popularity', 'release_date', 'title']

In [50]:
# Reorder columns in lgbt_movies_df.
lgbt_movies_df = lgbt_movies_df[columns]

In [146]:
# Check work.
print(len(lgbt_movies_df))
lgbt_movies_df.head()

1299


Unnamed: 0,id,genre_ids,original_title,overview,popularity,release_date,title,rating,genres
0,507903,"[35, 18]",Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,3.248,2022-12-31,Spoiler Alert,NR,"[Comedy, Drama]"
1,876802,"[27, 53]",Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.644,2022-10-01,Saint Drogo,R,"[Horror, Thriller]"
2,793992,[18],Three Months,"The story of Caleb, a South Florida teen who l...",1.464,2022-02-23,Three Months,NR,[Drama]
3,929477,[10749],Heart Shot,Teenagers Nikki and Sam are in love and planni...,4.903,2022-02-17,Heart Shot,PG-13,[Romance]
4,920345,[18],EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.928,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),NR,[Drama]


## Get Additional Movie Info

In [52]:
# Create a list of just the movie IDs.
lgbt_ids = lgbt_movies_df['id'].tolist()

In [53]:
lgbt_info = [get_info(n, lgbt_ids) for n in range(len(lgbt_ids))]

lgbt_info_df = pd.concat(lgbt_info)

In [54]:
print(len(lgbt_info_df))
lgbt_info_df.head()

1298


Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,status,tagline,title,video,vote_average,vote_count,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path
0,False,,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,507903,tt7775720,en,Spoiler Alert,...,In Production,,Spoiler Alert,False,0.0,0,,,,
0,False,,,20000,"[{'id': 27, 'name': 'Horror'}, {'id': 53, 'nam...",https://www.monstermakeupllc.com,876802,tt15470856,en,Saint Drogo,...,In Production,,Saint Drogo,False,0.0,0,,,,
0,False,,,0,"[{'id': 18, 'name': 'Drama'}]",,793992,tt5322004,en,Three Months,...,Post Production,It's amazing what you can discover when life g...,Three Months,False,0.0,0,,,,
0,False,,,0,"[{'id': 10749, 'name': 'Romance'}]",https://www.netflix.com/title/81343173,929477,tt17162546,en,Heart Shot,...,Post Production,,Heart Shot,False,0.0,0,,,,
0,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,,20000,"[{'id': 18, 'name': 'Drama'}]",,920345,,en,EMPATHY (Or: The Girl with The Pearled Hair),...,Released,work in symmetry,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,,,,


In [55]:
# Clean the additional movie info dataframe.
lgbt_info_df = lgbt_info_df[['budget', 'id', 'imdb_id', 'revenue', 'runtime', 'status']].reset_index().drop(columns="index")

In [56]:
# Check that a movie ("Love, Simon") has the name index number as in the main movies dataframe.
lgbt_info_df.loc[[360]]

Unnamed: 0,budget,id,imdb_id,revenue,runtime,status
360,17000000,449176,tt5164432,66316289,110,Released


In [57]:
# Drop id column now that you know it was indexed correctly.
lgbt_info_df = lgbt_info_df.drop(columns="id")

In [58]:
lgbt_info_df.loc[[360]]

Unnamed: 0,budget,imdb_id,revenue,runtime,status
360,17000000,tt5164432,66316289,110,Released


## Join Additional Info into Movies Dataframe

In [59]:
# Join the additional info into the movies dataframe.
lgbt_movies_df = lgbt_movies_df.join(lgbt_info_df, how='left')

In [60]:
# Check to make sure items joined correctly.
lgbt_movies_df.loc[[360]]

Unnamed: 0,id,genre_ids,genre,original_title,rating,overview,popularity,release_date,title,budget,imdb_id,revenue,runtime,status
360,449176,"[35, 18, 10749]","[Comedy, Drama, Romance]","Love, Simon",PG-13,Everyone deserves a great love story. But for ...,38.465,2018-03-16,"Love, Simon",17000000,tt5164432,66316289,110,Released


In [61]:
# Reorder columns.
new_columns = [
    'id', 'original_title', 'imdb_id', 'genre_ids',
    'genre', 'rating',
    'overview', 'popularity', 'release_date','budget',
    'revenue', 'runtime', 'status', 'title']

In [62]:
# Assign new column order to lgbt_movies_df.
lgbt_movies_df = lgbt_movies_df[new_columns]

In [63]:
# Check column order.
lgbt_movies_df.loc[[360]]

Unnamed: 0,id,original_title,imdb_id,genre_ids,genre,rating,overview,popularity,release_date,budget,revenue,runtime,status,title
360,449176,"Love, Simon",tt5164432,"[35, 18, 10749]","[Comedy, Drama, Romance]",PG-13,Everyone deserves a great love story. But for ...,38.465,2018-03-16,17000000,66316289,110,Released,"Love, Simon"


##  Create a CSV file for the LGBT movies.

In [64]:
# Export the movies_df into a CSV file.
# lgbt_movies_df.to_csv("../CSVs/lgbt_movies.csv")

# Part 2: All Movies

In [65]:
# Create a new URL to use the 

## Create Dataframe to hold all movies released from 2000-2022

In [66]:
# Assign new number to the pages_requested variable
pages_requested = 

SyntaxError: invalid syntax (<ipython-input-66-74f5b340df87>, line 2)