In [1]:
# Import the requests library and API key
import pandas as pd
import requests
from config import tmdb_api_key

In [2]:
# Starting URL for TMDB API including the API key from config.py
url = "https://api.themoviedb.org/3/discover/movie?api_key=" + tmdb_api_key

In [3]:
# URL for movies tagged with keywords "LGBT, gay, lesbian, trans, and transgender"
# and using the language (English), region (US), release date start (01-01-2000) and end (12-31-2022), 
# sort by (release date descending) parameters and leaving the page parameter blank at the end (so it can be filled 
# in the steps below).
lgbt_url = url + "&language=en-US&region=US&sort_by=primary_release_date.desc&include_adult=false&include_video=false&primary_release_date.gte=2000-01-01&primary_release_date.lte=2022-12-31&with_keywords=158718%7C264384%7C264386%7C273637%7C290527%7C163037&with_original_language=en"

In [4]:
# Create a function to iterate through multiple pages of the API call. 
# Specifying the params for params with a dictionary containing the key/value pair for "page".
# In lgbt_url, "page=" is the final parameter in the API call. 

def get_page(page_num):
    lgbt_movies = requests.get(
        lgbt_url,
        params={"page": page_num}
    )
    # Create a variable to hold the JSON text of the 'get' request
    lgbt_movies_data = lgbt_movies.json()
    # Create a Pandas Dataframe for 'results' from the JSON dictionary
    return pd.DataFrame(lgbt_movies_data['results'])

# The number of pages you want
pages_requested = 65

# Get all pages as dataframes by calling the function "get_page()" where "page_num" is variable n + 1.
# The list comprehension will iterate through the range of "pages_requested" which is 65, adding 1 to
# each iteration to make the count 1-65.
pages = [get_page(n + 1) for n in range(pages_requested)]

# Combine pages to single dataframe using the concat function (aka concatenate)
lgbt_movies_df = pd.concat(pages)

In [5]:
# Check first 25 results to ensure dataframe was created correctly
lgbt_movies_df.head(25)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,,"[35, 18]",507903,en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,3.39,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0
1,False,,"[27, 53]",876802,en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.539,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0
2,False,,[10749],929477,en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,1.784,,2022-02-17,Heart Shot,False,0.0,0
3,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],920345,en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,14.296,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0
4,False,/yADB6tXh5g2kXK111rNa8eYLgEq.jpg,[18],914281,en,Starfuckers,An intimate evening between a film director an...,5.803,/m9wG2LfRVogDQ9xcUuY32SHIGxi.jpg,2022-01-22,Starfuckers,False,0.0,0
5,False,/f7zrgyGgdrSQgPEzgn9GlJEoGw.jpg,[99],913854,en,Framing Agnes,"Agnes, the pioneering, pseudonymized, transgen...",6.102,/oQGTNDGsXpcWygsJrVHqqcTrudx.jpg,2022-01-22,Framing Agnes,False,0.0,0
6,False,,[18],927872,en,Outside,"Set in Seattle, OUTSIDE examines the intersect...",11.85,/m8TQ7CMBz4uDGLOjMaouOittPtn.jpg,2022-01-16,Outside,False,4.5,2
7,False,/hBDJ1qiZwmA7QJC3WdZePpYem48.jpg,"[35, 18]",756044,en,Egghead & Twinkie,Egghead & Twinkie is a feature-length coming o...,3.1,/3GgA8xDAs65MHAtP1qnA3SNM19a.jpg,2022-01-01,Egghead & Twinkie,False,0.0,0
8,False,/7y6R59LHLDahZdQw5ZjKbsSTdjV.jpg,"[53, 27, 9648]",723377,en,The Scary of Sixty-First,Two roommates’ lives are upended after finding...,10.306,/iP2x1rjh6H2dKSm5eFxAo2e46Jb.jpg,2021-12-17,The Scary of Sixty-First,False,5.7,33
9,False,/Ainzn99TYR1LZWL7pKEBx3AH8Oy.jpg,[35],915164,en,TWO BEERS,"Two lifelong friends, Graham and Adrian, have ...",0.843,/pXEgPcmE1XRwF6543iOFM2XLGzU.jpg,2021-12-10,TWO BEERS,False,10.0,1


In [6]:
# Check final 20 results to ensure dataframe was created correctly
lgbt_movies_df.tail(20)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
16,False,/jnJ4i6vPQETcfjxOvAODxzsdQfX.jpg,[18],5001,en,Before Night Falls,"Spanning several decades, this powerful biopic...",9.366,/kb2dqj73cfmt6qHAh1ySIlVm6v0.jpg,2000-09-03,Before Night Falls,False,6.6,207
17,False,/f4ZgIaMpOWd6rewhCfoIIMqWquh.jpg,"[35, 18, 10749]",20770,en,But I'm a Cheerleader,Megan is an all-American girl. A cheerleader. ...,12.879,/2TBadRiMH15R3UFnc7hSBkbYH8S.jpg,2000-07-07,But I'm a Cheerleader,False,7.0,409
18,False,/tXChoIzVsQHeXqsrILUuxjxBGXr.jpg,"[18, 10749]",186791,en,Birthday Time,"Tom, a married closet case and parent who trol...",0.6,/ogKcf96A4h7UXG6Vex9lgvKKzVy.jpg,2000-06-21,Birthday Time,False,3.3,3
19,False,,"[18, 10749]",41636,en,Get Your Stuff,"A wealthy professional gay couple, who wish to...",4.487,/8xfUJieDjvBSqM3q1UBYj9dxYpy.jpg,2000-06-17,Get Your Stuff,False,5.3,13
0,False,,[18],41644,en,Forgive and Forget,"David O'Neil, a plasterer and mature student T...",3.088,/ps9ZZPP5rwf1AvraJRrXq9HJyEM.jpg,2000-06-12,Forgive and Forget,False,5.3,11
1,False,,[18],27100,en,Eban and Charley,29-year-old Eban has retreated home to his par...,2.853,/4jvVDWA0wHudVA6TH4rmnfigCnN.jpg,2000-06-12,Eban and Charley,False,4.7,12
2,False,,[99],49491,en,101 Rent Boys,"Paid 50 dollars for their time, 101 male prost...",2.466,/1j1tE1qPCR3uaTEsj6jcgwicbY7.jpg,2000-06-07,101 Rent Boys,False,6.5,6
3,False,,"[10749, 18]",598274,en,3 Play,"""3 Play"" adds a new unilateral twist to the cl...",0.882,/7Gci16QvTw3ajCY0VlxPRGGkWSr.jpg,2000-06-05,3 Play,False,1.0,1
4,False,,"[35, 18, 10749]",44508,en,Spin the Bottle,When five childhood friends reunite for a week...,2.862,/AsLrkoOKGyw5smXxPab4XYVzlK7.jpg,2000-05-04,Spin the Bottle,False,5.7,9
5,False,/nFTu2wKCs2AEuDUL4GuFxuCNBU0.jpg,"[18, 10749]",21056,en,Big Eden,Henry Hart is a young gay artist living in New...,5.22,/tZ3f8MNLwk38Ydla8GMcQ6yopoj.jpg,2000-04-18,Big Eden,False,6.5,32


In [7]:
# Reset index
lgbt_movies_df = lgbt_movies_df.reset_index()

In [8]:
# Ensure index reset correctly
lgbt_movies_df.head(1)

Unnamed: 0,index,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,0,False,,"[35, 18]",507903,en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,3.39,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0


In [9]:
# Drop original index column.
lgbt_movies_df = lgbt_movies_df.drop(columns=["index"])

In [10]:
# Test to ensure original index column dropped correctly
lgbt_movies_df.iloc[[349, 777]]

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
349,False,,[99],635068,en,Fluidity,What is queer now? This simple documentary fea...,0.6,/8ier7wXeXcB3V9nZr73OG37LfNH.jpg,2018-03-19,Fluidity,False,0.0,0
777,False,/izvD0x0E07F7BuosKKqtJocdV2p.jpg,"[10749, 35, 18]",146778,en,The Men Next Door,This is a fast-paced comedy about a 40-year-ol...,6.292,/kh0ZojSGZGWRmEuU1VZ7siL43ao.jpg,2012-12-11,The Men Next Door,False,4.6,14


In [11]:
# Create variable to hold length of the id column
i = len(lgbt_movies_df["id"])

# Create empty list for movie_ids
movie_ids = []

# Iterate through every movie id in the id column to put each id in movie_ids list.
for n in range(i):
    movie_ids.append(lgbt_movies_df["id"][n])

In [12]:
# Create a function to create a list of URLs to get release dates for each movie in the lgbt_movies_df.
# The release dates method holds certification information, which is needed.

def get_id_url(movie_id):
    # TMDB API url base with movie IDs being converted to a string. 
    movie_id_url = "https://api.themoviedb.org/3/movie/" + str(movie_ids[movie_id]) + "/release_dates?api_key=" + tmdb_api_key
    return movie_id_url

# A variable for the list of URLs for each specific movie ID as the above function iterates through each index in
# the list of movie IDs and creates a unique URL.
movie_id_urls = [get_id_url(n) for n in range(i)]

In [13]:
# Create a function to create a dataframe for each movie's release date data.

def get_release(movie_url):
    # The function will iterate through the list of URLs to get the JSON for each movie's release data.
    lgbt_movies_release = requests.get(movie_id_urls[movie_url])
    lgbt_release_data = lgbt_movies_release.json()
    return pd.DataFrame(lgbt_release_data['results'])

# A variable to hold the dataframes of release date data for each specific movie ID as the above function
# iterates through each index in the list of unique URLs based on movie IDs.
release_info = [get_release(n) for n in range(i)]

# Combine the dataframes in release_info into a singular dataframe.
lgbt_release_df = pd.concat(release_info)

In [14]:
# Set the index to 'iso_3166_1' AKA the country code for each country the film was released in.
lgbt_release_df = lgbt_release_df.set_index('iso_3166_1')
lgbt_release_df.head()

Unnamed: 0_level_0,release_dates
iso_3166_1,Unnamed: 1_level_1
US,"[{'certification': '', 'iso_639_1': 'en', 'not..."
US,"[{'certification': 'R', 'iso_639_1': 'en', 'no..."
US,"[{'certification': 'PG-13', 'iso_639_1': '', '..."
SG,"[{'certification': 'R21', 'iso_639_1': '', 'no..."
US,"[{'certification': 'NR', 'iso_639_1': 'en', 'n..."


In [15]:
# Verify length of lgbt_release_df (only including US releases) is the same as the length for the lgbt_movies_df.
len(lgbt_release_df.loc['US'])

1296

In [16]:
# Edit release dataframe to only include US release dates.
lgbt_release_df = lgbt_release_df.loc['US']
lgbt_release_df

Unnamed: 0_level_0,release_dates
iso_3166_1,Unnamed: 1_level_1
US,"[{'certification': '', 'iso_639_1': 'en', 'not..."
US,"[{'certification': 'R', 'iso_639_1': 'en', 'no..."
US,"[{'certification': 'PG-13', 'iso_639_1': '', '..."
US,"[{'certification': 'NR', 'iso_639_1': 'en', 'n..."
US,"[{'certification': '', 'iso_639_1': '', 'note'..."
...,...
US,"[{'certification': 'PG-13', 'iso_639_1': '', '..."
US,"[{'certification': '', 'iso_639_1': 'en', 'not..."
US,"[{'certification': '', 'iso_639_1': '', 'note'..."
US,"[{'certification': '', 'iso_639_1': '', 'note'..."


In [17]:
# Reset the index of lgbt_release_df so that the indices match the indices of lgbt_movies_df.
lgbt_release_df = lgbt_release_df.reset_index()

In [18]:
# Drop the 'iso_3166_1' column as it is now unnecessary.
lgbt_release_df = lgbt_release_df.drop(columns='iso_3166_1')

In [19]:
# Verify the lgbt_release_df looks correct.
lgbt_release_df

Unnamed: 0,release_dates
0,"[{'certification': '', 'iso_639_1': 'en', 'not..."
1,"[{'certification': 'R', 'iso_639_1': 'en', 'no..."
2,"[{'certification': 'PG-13', 'iso_639_1': '', '..."
3,"[{'certification': 'NR', 'iso_639_1': 'en', 'n..."
4,"[{'certification': '', 'iso_639_1': '', 'note'..."
...,...
1291,"[{'certification': 'PG-13', 'iso_639_1': '', '..."
1292,"[{'certification': '', 'iso_639_1': 'en', 'not..."
1293,"[{'certification': '', 'iso_639_1': '', 'note'..."
1294,"[{'certification': '', 'iso_639_1': '', 'note'..."
