In [173]:
# Import the requests library and API key
import pandas as pd
import requests
from config import tmdb_api_key

In [174]:
# Starting URL for TMDB API discover method including the API key from config.py. Using the language (English),
# region (US), release date start (01-01-2000) and end (12-31-2022), sort by (release date descending) parameters.
# Not included are the "page" and "certification" parameters which will be included in the function later below.
url = "https://api.themoviedb.org/3/discover/movie?api_key=" + tmdb_api_key + "&language=en-US&region=US&sort_by=primary_release_date.desc&certification_country=US&include_adult=false&include_video=false&primary_release_date.gte=2000-01-01&primary_release_date.lte=2022-12-31&with_original_language=en"

In [175]:
# Create a function that can take different URLs that you input to iterate through multiple pages of 
# the API call and to constrain the search to particular certification ratings. Specifying the params 
# for params with a dictionary containing the key/value pair for "page" and "certification".

def get_movies(url_name, page_num, rating):
    movies = requests.get(
        url_name,
        params={
            "page": page_num,
            "certification": rating}
    )
    # Create a variable to hold the JSON text of the 'get' request
    movies_data = movies.json()
    # Create a Pandas Dataframe for 'results' from the JSON dictionary
    return pd.DataFrame(movies_data['results'])

In [176]:
# Create a function that adds ratings into each individual rating dataframe.
def isolate_ratings(rating_df, rating):
    rating_df['rating'] = rating
    rating_df = rating_df.loc[:, ('id', 'rating')]
    return rating_df

In [299]:
# Create a function to get the additional movie information for every individual movie ID in the movies dataframe.

def get_info(movie_df):
    def gather_info(movie_url,id_list):
    #     Create a function to input each individual movie ID into the movie_id_url.
        def get_id_url(id_index):
            movie_id_url = "https://api.themoviedb.org/3/movie/" + str(id_list[id_index]) + "?api_key=" + tmdb_api_key
            return movie_id_url
    #     Create a list of movie URLs based on each movie's ID.
        movie_id_urls = [get_id_url(n) for n in range(len(id_list))]
        info_request = requests.get(movie_id_urls[movie_url])
        info_data = info_request.json()
        return pd.json_normalize(info_data)
    
    # Create a list of just the movie IDs so it can be called upon.
    id_list = movie_df['id'].tolist()
    
    # Get each movie's additional info by calling the function where n is the index of the ids list
    # and lgbts_ids is each movie's individual id.
    info = [gather_info(n, id_list) for n in range(len(id_list))]

    # Combine each movie's additional info dataframe into one.
    info_df = pd.concat(info)
    return info_df.reset_index().drop(columns='index')

In [282]:
# Create function that combines all individual rating dataframes into one.
# def combine_ratings(rating_df_list):
#     ratings_df = pd.concat(rating_df_list).set_index('id')
#     return ratings_df

def combine_clean_data(main_df, rating_df_list, info_df):
    
    # Create a nondestructive copy of the main_df
    movie_df = main_df.set_index('id')
    
    # Ensure that there are no "\r" substrings in the string of the "overview" column. This would cause formatting
    # issues when the dataframe is exported to a CSV
    movie_df['overview'] = movie_df['overview'].str.replace("\r", "")
    
    ratings_df = pd.concat(rating_df_list).set_index('id')
    movie_df = movie_df.join(ratings_df, on='id', how='left').reset_index()

    # Drop duplicates 
    movie_df = movie_df.drop_duplicates(subset=['id'], keep='last').reset_index().drop(columns='index')
    
    movie_df['rating'].fillna('NR', inplace=True)
    
    
    # Add genre names
    # Create genre_url variable
    genre_url = "https://api.themoviedb.org/3/genre/movie/list?api_key=" + tmdb_api_key + "&language=en-US"
    genre_request = requests.get(genre_url)
    genre_data = genre_request.json()
    genre_info = genre_data['genres']
    
    ids = [i['id'] for i in genre_info]
    names = [n['name'] for n in genre_info]
    
    genres_df = pd.DataFrame()
    genres_df[["id", "genres"]] = movie_df[['id','genre_ids']]
    sep_genres_df = pd.DataFrame(
        genres_df['genres'].to_list(), columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])
    
    sep_genres_df = sep_genres_df.replace(ids, names)
    sep_genres_df['genres'] = sep_genres_df.values.tolist()
    
    # Remove the null values from the list.
    sep_genres_df['genres'] = sep_genres_df.genres.apply(lambda x: [name for name in x if not pd.isnull(name)])
    
    # Drop individual genre columns.
    sep_genres_df = sep_genres_df.drop(columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])

    sep_genres_df['id'] = genres_df['id']
    sep_genres_df = sep_genres_df.set_index('id')
    
    # Join genres column from sep_genres_df into the main movie_df
    movie_df = movie_df.join(sep_genres_df, on='id', how='left')
    
    add_info_df = info_df[['id', 'budget', 'imdb_id', 'revenue', 'runtime', 'status']].set_index('id')
    movie_df = movie_df.join(add_info_df, on='id', how='left')
    
    # Reorder columns.
    new_columns = [
        'id', 'original_title', 'imdb_id', 'genre_ids', 'genres', 'rating', 'overview', 'popularity',
        'release_date','budget', 'revenue', 'runtime', 'status', 'title']
    return movie_df[new_columns]
    

In [256]:
# COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY 
# COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY COPY 



# Create function that combines all individual rating dataframes into one.
# def combine_ratings(rating_df_list):
#     ratings_df = pd.concat(rating_df_list).set_index('id')
#     return ratings_df

# def combine_clean_data(main_df, rating_df_list, info_df):


def join_ratings(main_df, rating_df_list):
    # Create a nondestructive copy of the main_df
    movie_df = main_df
    
    # Ensure that there are no "\r" substrings in the string of the "overview" column. This would cause formatting
    # issues when the dataframe is exported to a CSV
    movie_df['overview'] = movie_df['overview'].str.replace("\r", "")
    
    ratings_df = pd.concat(rating_df_list).set_index('id')
    movie_df = movie_df.join(ratings_df, on='id', how='left').reset_index()

    # Drop duplicates 
    movie_df = movie_df.drop_duplicates(subset=['id'], keep='last').reset_index().drop(columns='index')
    
    movie_df['rating'].fillna('NR', inplace=True)
    return movie_df
    

    
    
    
    
def get_genres(main_df):
    
    # Add genre names
    # Create genre_url variable
    
    genre_url = "https://api.themoviedb.org/3/genre/movie/list?api_key=" + tmdb_api_key + "&language=en-US"
    genre_request = requests.get(genre_url)
    genre_data = genre_request.json()
    genre_info = genre_data['genres']
    
    ids = [i['id'] for i in genre_info]
    names = [n['name'] for n in genre_info]
    
    genres_df = pd.DataFrame()
    genres_df[["id", "genres"]] = main_df[['id','genre_ids']]
#     movie_df = main_df.set_index('id')
    sep_genres_df = pd.DataFrame(
        genres_df['genres'].to_list(), columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])
    
    sep_genres_df['id'] = genres_df['id']
    sep_genres_df = sep_genres_df.set_index('id')
    sep_genres_df = sep_genres_df.replace(ids, names)
    sep_genres_df['genres'] = sep_genres_df.values.tolist()
    
    # Remove the null values from the list.
    sep_genres_df['genres'] = sep_genres_df.genres.apply(lambda x: [name for name in x if not pd.isnull(name)])
    sep_genres_df = sep_genres_df.drop(columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])
    
    # Join genres column from sep_genres_df into the main movie_df
    movie_df = main_df.join(sep_genres_df, how='left')
    return movie_df.reset_index()


#     add_info_df = info_df[['budget', 'imdb_id', 'revenue', 'runtime', 'status']].reset_index()
#     movie_df = movie_df.join(add_info_df, on='id', how='left')
    
#     # Reorder columns.
#     new_columns = [
#         'id', 'original_title', 'imdb_id', 'genre_ids', 'genres', 'rating', 'overview', 'popularity',
#         'release_date','budget', 'revenue', 'runtime', 'status', 'title']
#     return movie_df[new_columns]
    

In [179]:
# Create URL for movies tagged with keywords "LGBT, gay, lesbian, trans, transgender, and gay teen."
lgbt_url = url + "&with_keywords=158718%7C264384%7C264386%7C273637%7C290527%7C163037"

In [180]:
# The number of pages you want. (Note: the max number of pages for the ratings comes from the total amount of
# pages that the API call will iterate through to get all movies regardless of their certification rating).
pages_requested = 65

# Get all pages for each US rating as dataframes by calling the function "get_ratings()" 
# where "page_num" is variable n + 1. The list comprehension will iterate through the range 
# of "pages_requested" which is 12, adding 1 to each iteration to make the count 1-12. The get_ratings function
# uses the second variable to specify the rating needed in the URL parameter.
pages_lgbt = [get_movies(lgbt_url, n + 1, "") for n in range(pages_requested)]
pages_nr_lgbt = [get_movies(lgbt_url, n + 1, "NR") for n in range(pages_requested)]
pages_g_lgbt = [get_movies(lgbt_url, n + 1, "G") for n in range(pages_requested)]
pages_pg_lgbt = [get_movies(lgbt_url, n + 1, "PG") for n in range(pages_requested)]
pages_pg13_lgbt = [get_movies(lgbt_url, n + 1, "PG-13") for n in range(pages_requested)]
pages_r_lgbt = [get_movies(lgbt_url, n + 1, "R") for n in range(pages_requested)]
pages_nc17_lgbt = [get_movies(lgbt_url, n + 1, "NC-17") for n in range(pages_requested)]

# Combine the pages for each dataframe to single dataframe using the concat function (aka concatenate).
lgbt_movies_df = pd.concat(pages_lgbt)
nr_lgbt_df = pd.concat(pages_nr_lgbt)
g_lgbt_df = pd.concat(pages_g_lgbt) 
pg_lgbt_df = pd.concat(pages_pg_lgbt)
pg13_lgbt_df = pd.concat(pages_pg13_lgbt)
r_lgbt_df = pd.concat(pages_r_lgbt)
nc17_lgbt_df = pd.concat(pages_nc17_lgbt)

In [181]:
# Verify the length of each dataframe.
# (Note: there are no films in the G rating dataframe, meaning there are no LGBT films with a G rating.
# Therefore, g_lgbt_df will not be included in subsequent data wrangling).
print(len(nr_lgbt_df))
print(len(g_lgbt_df))
print(len(pg_lgbt_df))
print(len(pg13_lgbt_df))
print(len(r_lgbt_df))
print(len(nc17_lgbt_df))

225
0
12
47
109
3


In [182]:
nr_lgbt_df = isolate_ratings(nr_lgbt_df, "NR")
pg_lgbt_df = isolate_ratings(pg_lgbt_df, "PG")
pg13_lgbt_df = isolate_ratings(pg13_lgbt_df, "PG-13")
r_lgbt_df = isolate_ratings(r_lgbt_df, "R")
nc17_lgbt_df = isolate_ratings(nc17_lgbt_df, "NC-17")

In [183]:
r_lgbt_df.head()

Unnamed: 0,id,rating
0,876802,R
1,915164,R
2,857731,R
3,552269,R
4,591273,R


In [108]:
# # Create a list to store all the ratings dataframes.
# ratings_df_list = [nr_lgbt_df, g_lgbt_df,
#                    pg_lgbt_df, pg13_lgbt_df,
#                    r_lgbt_df, nc17_lgbt_df]

In [301]:
# # Create a list of just the movie IDs so it can be called upon.
# lgbt_ids = lgbt_movies_df['id'].tolist()

# # Get each movie's additional info by calling the functioon where n is the index of the ids list
# # and lgbts_ids is each movie's individual id.
# lgbt_info = [get_info(n, lgbt_ids) for n in range(len(lgbt_ids))]

# # Combine each movie's additional info dataframe into one.
# lgbt_info_df = pd.concat(lgbt_info)
lgbt_info_df = get_info(lgbt_movies_df)

In [302]:
# print(len(lgbt_info_df))
lgbt_info_df[['id', 'budget', 'imdb_id', 'revenue', 'runtime', 'status']].set_index('id')

Unnamed: 0_level_0,budget,imdb_id,revenue,runtime,status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
507903,0,tt7775720,0,0,In Production
876802,20000,tt15470856,0,0,In Production
793992,0,tt5322004,0,0,Post Production
929477,0,tt17162546,0,19,Post Production
920345,20000,,0,19,Released
...,...,...,...,...,...
248757,0,tt0160710,0,91,Released
515728,0,tt0191181,0,18,Released
306484,0,tt1063334,0,25,Released
262942,0,tt0297034,0,69,Released


In [281]:
print(len(lgbt_info_df))
lgbt_info_df[['id', 'budget', 'imdb_id', 'revenue', 'runtime', 'status']].set_index('id')

1299


Unnamed: 0_level_0,budget,imdb_id,revenue,runtime,status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
507903,0,tt7775720,0,0,In Production
876802,20000,tt15470856,0,0,In Production
793992,0,tt5322004,0,0,Post Production
929477,0,tt17162546,0,19,Post Production
920345,20000,,0,19,Released
...,...,...,...,...,...
248757,0,tt0160710,0,91,Released
515728,0,tt0191181,0,18,Released
306484,0,tt1063334,0,25,Released
262942,0,tt0297034,0,69,Released


In [257]:
test_df_ratings = join_ratings(lgbt_movies_df, [nr_lgbt_df, pg_lgbt_df, pg13_lgbt_df, r_lgbt_df, nc17_lgbt_df])

In [258]:
print(len(test_df_ratings))
test_df_ratings.head()

1299


Unnamed: 0,level_0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating
0,0,False,,"[35, 18]",507903,en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,3.248,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0,NR
1,1,False,,"[27, 53]",876802,en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.644,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0,R
2,2,False,,[18],793992,en,Three Months,"The story of Caleb, a South Florida teen who l...",1.464,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0,NR
3,3,False,,[10749],929477,en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,4.903,,2022-02-17,Heart Shot,False,0.0,0,PG-13
4,4,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],920345,en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.928,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,NR


In [259]:
test_df_ratings.loc[724:730]

Unnamed: 0,level_0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating
724,724,False,/om54JgwkKYxJsgLve3y1ne7TExH.jpg,"[35, 18]",156713,en,C.O.G.,A gay cocky young man travels to Oregon to wor...,6.681,/sa0iJ5XAPZzxsZWo0fEoMjrujaZ.jpg,2013-09-20,C.O.G.,False,5.5,65,R
725,726,False,/lkQ5ZZsCrQ1S7TOzujXM2Ct7NvZ.jpg,"[18, 10749, 53]",157370,en,Kill Your Darlings,A murder in 1944 draws together the great poet...,15.979,/yswbLHDNm4gdbIk3DVEKT7Yzu4T.jpg,2013-10-16,Kill Your Darlings,False,7.1,1148,R
726,727,False,/myYMBzPXgeZ3zPnYtyUB1dYvxgU.jpg,[18],221801,en,Naked as We Came,"Love, loss and hope are tumultuously explored ...",6.163,/1OeU7vYojFopPUvh9wjNgUx1uhG.jpg,2013-09-13,Naked as We Came,False,5.7,31,NR
727,728,False,,[99],212849,en,The Dog,"In 1972, John Wojtowicz attempted to rob a Bro...",2.986,/sgaFoabatyHxumdzTKSw6AAKdmQ.jpg,2014-08-05,The Dog,False,6.8,23,NR
728,729,False,/mSUnNIjJEkqkWxbklDjWCD2RUdy.jpg,"[10749, 35]",212721,en,Gerontophilia,Lake is in a straight relationship with Desire...,9.834,/1TpLW6DWtsRHScA8bWe0AUNE7n5.jpg,2014-05-24,Gerontophilia,False,6.4,67,NR
729,730,False,/iFGpIJq7i3jAewxqJg8zihphUfN.jpg,"[35, 10749]",216138,en,10 Rules for Sleeping Around,"By following ten simple rules, 20-somethings ...",8.173,/dX8IMDXbVWJ7fq0AX0efog1SMX5.jpg,2014-04-04,10 Rules for Sleeping Around,False,3.6,46,R
730,731,False,/uWW6bLyvXt7TpHe6VYRneFficI7.jpg,"[99, 35]",270698,en,I'm a Porn Star,I'm a Porn Star follows the lives of guys in t...,14.795,/28ZqVlvpYJ1hEPuICiNsrZbDV3i.jpg,2013-10-04,I'm a Porn Star,False,5.6,17,NR


In [240]:
get_genres(test_df_ratings).loc[724:730]

Unnamed: 0,index,id,adult,backdrop_path,genre_ids,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,rating,genres
724,724,156713,False,/om54JgwkKYxJsgLve3y1ne7TExH.jpg,"[35, 18]",en,C.O.G.,A gay cocky young man travels to Oregon to wor...,6.681,/sa0iJ5XAPZzxsZWo0fEoMjrujaZ.jpg,2013-09-20,C.O.G.,False,5.5,65,R,
725,726,157370,False,/lkQ5ZZsCrQ1S7TOzujXM2Ct7NvZ.jpg,"[18, 10749, 53]",en,Kill Your Darlings,A murder in 1944 draws together the great poet...,15.979,/yswbLHDNm4gdbIk3DVEKT7Yzu4T.jpg,2013-10-16,Kill Your Darlings,False,7.1,1148,R,
726,727,221801,False,/myYMBzPXgeZ3zPnYtyUB1dYvxgU.jpg,[18],en,Naked as We Came,"Love, loss and hope are tumultuously explored ...",6.163,/1OeU7vYojFopPUvh9wjNgUx1uhG.jpg,2013-09-13,Naked as We Came,False,5.7,31,NR,
727,728,212849,False,,[99],en,The Dog,"In 1972, John Wojtowicz attempted to rob a Bro...",2.986,/sgaFoabatyHxumdzTKSw6AAKdmQ.jpg,2014-08-05,The Dog,False,6.8,23,NR,
728,729,212721,False,/mSUnNIjJEkqkWxbklDjWCD2RUdy.jpg,"[10749, 35]",en,Gerontophilia,Lake is in a straight relationship with Desire...,9.834,/1TpLW6DWtsRHScA8bWe0AUNE7n5.jpg,2014-05-24,Gerontophilia,False,6.4,67,NR,
729,730,216138,False,/iFGpIJq7i3jAewxqJg8zihphUfN.jpg,"[35, 10749]",en,10 Rules for Sleeping Around,"By following ten simple rules, 20-somethings ...",8.173,/dX8IMDXbVWJ7fq0AX0efog1SMX5.jpg,2014-04-04,10 Rules for Sleeping Around,False,3.6,46,R,
730,731,270698,False,/uWW6bLyvXt7TpHe6VYRneFficI7.jpg,"[99, 35]",en,I'm a Porn Star,I'm a Porn Star follows the lives of guys in t...,14.795,/28ZqVlvpYJ1hEPuICiNsrZbDV3i.jpg,2013-10-04,I'm a Porn Star,False,5.6,17,NR,


In [303]:
lgbt_movie_df = combine_clean_data(lgbt_movies_df, [nr_lgbt_df, pg_lgbt_df, pg13_lgbt_df, r_lgbt_df, nc17_lgbt_df], lgbt_info_df)

In [304]:
# Check to make sure items joined correctly.
print(len(lgbt_movie_df))
lgbt_movie_df.loc[lgbt_movie_df['title'] == "Rent"]

1299


Unnamed: 0,id,original_title,imdb_id,genre_ids,genres,rating,overview,popularity,release_date,budget,revenue,runtime,status,title
260,557648,Rent,tt6881910,"[10402, 10770, 18]","[Music, TV Movie, Drama]",NR,The story of several friends in New York City ...,7.69,2019-01-27,0,0,135,Released,Rent
1115,1833,Rent,tt0294870,"[18, 10749]","[Drama, Romance]",PG-13,This rock opera tells the story of one year in...,15.783,2005-11-17,40000000,31670620,135,Released,Rent


In [278]:
lgbt_movie_df.loc[724:730]

Unnamed: 0,id,original_title,imdb_id,genre_ids,genres,rating,overview,popularity,release_date,budget,revenue,runtime,status,title
724,156713,C.O.G.,,"[35, 18]","[Comedy, Drama]",R,A gay cocky young man travels to Oregon to wor...,6.681,2013-09-20,,,,,C.O.G.
725,157370,Kill Your Darlings,,"[18, 10749, 53]","[Drama, Romance, Thriller]",R,A murder in 1944 draws together the great poet...,15.979,2013-10-16,,,,,Kill Your Darlings
726,221801,Naked as We Came,,[18],[Drama],NR,"Love, loss and hope are tumultuously explored ...",6.163,2013-09-13,,,,,Naked as We Came
727,212849,The Dog,,[99],[Documentary],NR,"In 1972, John Wojtowicz attempted to rob a Bro...",2.986,2014-08-05,,,,,The Dog
728,212721,Gerontophilia,,"[10749, 35]","[Romance, Comedy]",NR,Lake is in a straight relationship with Desire...,9.834,2014-05-24,,,,,Gerontophilia
729,216138,10 Rules for Sleeping Around,,"[35, 10749]","[Comedy, Romance]",R,"By following ten simple rules, 20-somethings ...",8.173,2014-04-04,,,,,10 Rules for Sleeping Around
730,270698,I'm a Porn Star,,"[99, 35]","[Documentary, Comedy]",NR,I'm a Porn Star follows the lives of guys in t...,14.795,2013-10-04,,,,,I'm a Porn Star


In [163]:
lgbt_movies_df.reset_index().loc[724:730]

Unnamed: 0,index,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,genre
724,4,False,/om54JgwkKYxJsgLve3y1ne7TExH.jpg,"[35, 18]",156713,en,C.O.G.,A gay cocky young man travels to Oregon to wor...,6.681,/sa0iJ5XAPZzxsZWo0fEoMjrujaZ.jpg,2013-09-20,C.O.G.,False,5.5,65,[Drama]
725,5,False,/lkQ5ZZsCrQ1S7TOzujXM2Ct7NvZ.jpg,"[18, 10749, 53]",157370,en,Kill Your Darlings,A murder in 1944 draws together the great poet...,15.979,/yswbLHDNm4gdbIk3DVEKT7Yzu4T.jpg,2013-10-16,Kill Your Darlings,False,7.1,1148,"[Drama, Comedy]"
726,6,False,/myYMBzPXgeZ3zPnYtyUB1dYvxgU.jpg,[18],221801,en,Naked as We Came,"Love, loss and hope are tumultuously explored ...",6.163,/1OeU7vYojFopPUvh9wjNgUx1uhG.jpg,2013-09-13,Naked as We Came,False,5.7,31,[Drama]
727,7,False,,[99],212849,en,The Dog,"In 1972, John Wojtowicz attempted to rob a Bro...",2.986,/sgaFoabatyHxumdzTKSw6AAKdmQ.jpg,2014-08-05,The Dog,False,6.8,23,[Documentary]
728,8,False,/mSUnNIjJEkqkWxbklDjWCD2RUdy.jpg,"[10749, 35]",212721,en,Gerontophilia,Lake is in a straight relationship with Desire...,9.834,/1TpLW6DWtsRHScA8bWe0AUNE7n5.jpg,2014-05-24,Gerontophilia,False,6.4,67,[Drama]
729,9,False,/iFGpIJq7i3jAewxqJg8zihphUfN.jpg,"[35, 10749]",216138,en,10 Rules for Sleeping Around,"By following ten simple rules, 20-somethings ...",8.173,/dX8IMDXbVWJ7fq0AX0efog1SMX5.jpg,2014-04-04,10 Rules for Sleeping Around,False,3.6,46,"[Comedy, Drama]"
730,10,False,/uWW6bLyvXt7TpHe6VYRneFficI7.jpg,"[99, 35]",270698,en,I'm a Porn Star,I'm a Porn Star follows the lives of guys in t...,14.795,/28ZqVlvpYJ1hEPuICiNsrZbDV3i.jpg,2013-10-04,I'm a Porn Star,False,5.6,17,"[Thriller, Horror, Mystery]"


In [162]:
test.loc[[724:730]]

SyntaxError: invalid syntax (<ipython-input-162-bc91630e1ca2>, line 1)

In [133]:
lgbt_movie_df[lgbt_movie_df.title.duplicated()]

Unnamed: 0,id,original_title,imdb_id,genre_ids,genre,rating,overview,popularity,release_date,budget,revenue,runtime,status,title
438,490232,Ace,tt7176320,[],[],NR,An unlikely pair of teenagers perform an awkwa...,0.877,2017-04-30,0.0,0.0,,Released,Ace
845,665061,Pride,tt1727252,[35],"[Comedy, Horror]",NR,"On the morning of his annual gay pride party, ...",1.165,2011-10-22,0.0,0.0,120.0,Released,Pride
896,356753,AWOL,tt1762363,[18],"[Drama, Mystery]",NR,"Days before deployment to Afghanistan, Joey re...",0.994,2011-01-21,0.0,0.0,14.0,Released,AWOL
928,753451,Remission,tt1655387,[27],"[Romance, Science Fiction]",NR,Sam is waiting for a doctor's results of his l...,0.6,2010-04-16,0.0,0.0,93.0,Released,Remission
1116,1833,Rent,tt0424496,"[18, 10749]",[Documentary],PG-13,This rock opera tells the story of one year in...,15.783,2005-11-17,0.0,19738.0,95.0,Released,Rent
1128,49113,Dare,tt0446051,[18],"[Action, Crime, Drama, Mystery, Thriller]",NR,High school senior Ben secretly lusts after ba...,3.287,2005-08-13,0.0,0.0,98.0,Released,Dare
1230,178516,D.E.B.S.,tt0314039,"[35, 28, 10749]","[Drama, Thriller]",NR,A quartet of girls from a prep high school are...,2.406,2003-01-16,0.0,0.0,114.0,Released,D.E.B.S.


In [12]:
# Create genre_url variable
# genre_url = "https://api.themoviedb.org/3/genre/movie/list?api_key=" + tmdb_api_key + "&language=en-US"

In [96]:
def get_genres(genre_url):
    genre_request = requests.get(genre_url)
    genre_data = genre_request.json()
    genre_info = genre_data['genres']
    return genre_info
ids = [i['id'] for i in get_genres(genre_url)]
names = [n['name'] for n in get_genres(genre_url)]

In [98]:
ids = [i['id'] for i in get_genres(genre_url)]
names = [n['name'] for n in get_genres(genre_url)]

print(ids)
print(names)

[28, 12, 16, 35, 80, 99, 18, 10751, 14, 36, 27, 10402, 9648, 10749, 878, 10770, 53, 10752, 37]
['Action', 'Adventure', 'Animation', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music', 'Mystery', 'Romance', 'Science Fiction', 'TV Movie', 'Thriller', 'War', 'Western']


In [100]:
add_genre_names(lgbt_movies_df, genre_url)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,genre
0,False,,"[35, 18]",507903,en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,3.248,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0,"[Comedy, Drama]"
1,False,,"[27, 53]",876802,en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.644,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0,"[Horror, Thriller]"
2,False,,[18],793992,en,Three Months,"The story of Caleb, a South Florida teen who l...",1.464,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0,[Drama]
3,False,,[10749],929477,en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,4.903,,2022-02-17,Heart Shot,False,0.0,0,[Romance]
4,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],920345,en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.928,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,[Drama]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,False,/ti0VLq5Xv8PicnLAfcrnFmNsYmY.jpg,"[35, 10749]",248757,en,Punks,"In his directorial debut, Patrik-Ian Polk chro...",1.303,/3an3l9X95zTx8vtzxqYMhCxuBOz.jpg,2000-01-24,Punks,False,4.7,6,[Documentary]
14,False,/dD90r6NQ8cFgYjjYGSLRQLCdJWN.jpg,[18],515728,en,Hitch,Two friends are on a road trip and a one-sided...,1.790,/4kH0kEKmrDtxxJh0rSUtecJYOpK.jpg,2000-01-01,Hitch,False,5.1,8,"[Romance, Comedy, Drama]"
15,False,/n6kR7P93mUNj70zSdQLOWtExbyw.jpg,[],306484,en,Meet Joe Gay,Director Benjamin Morgan takes a hard look at ...,1.181,/pJYcgatiDE6eYwvO25baVoDkb0a.jpg,2000-01-01,Meet Joe Gay,False,4.0,5,"[Romance, Comedy]"
16,False,,[99],262942,en,A Boy Named Sue,A Boy Named Sue chronicles the transformation ...,1.583,/9uTNEluw8FAos3trAXqE7ffllZq.jpg,2000-01-01,A Boy Named Sue,False,3.0,2,[Documentary]


In [13]:
# Create function to add genre names
def add_genre_names(main_df):
     genres_df = pd.DataFrame()
    genres_df['genre'] = main_df['genre_ids']
    genres_df = pd.DataFrame(
        genres_df['genre'].to_list(), columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])
    def get_genres(url_for_genres):
        genre_request = requests.get(url_for_genres)
        genre_data = genre_request.json()
        genre_info = genre_data['genres']
        return genre_info
    ids = [i['id'] for i in get_genres(genre_url)]
    names = [n['name'] for n in get_genres(genre_url)]
    genres_df = genres_df.replace(ids, names)
    genres_df['genres'] = genres_df.values.tolist()
    genres_df['genres'] = genres_df.genres.apply(lambda x: [genre for genre in x if not pd.isnull(genre)])
    main_df['genre'] = genres_df['genres']
    return main_df

In [14]:
add_genre_names(lgbt_movies_df)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,genre
0,False,,"[35, 18]",507903,en,Spoiler Alert,The story of Michael Ausiello and Kit Cowan's ...,3.248,/np3y8wm2K7YK5RT6e68hw6IzhBw.jpg,2022-12-31,Spoiler Alert,False,0.0,0,"[Comedy, Drama]"
1,False,,"[27, 53]",876802,en,Saint Drogo,Saint Drogo is a supernatural slowburn themati...,1.644,/ghq9XVBvmlbqrsKA7KDMsYOPjAu.jpg,2022-10-01,Saint Drogo,False,0.0,0,"[Horror, Thriller]"
2,False,,[18],793992,en,Three Months,"The story of Caleb, a South Florida teen who l...",1.464,/AoMOVVkuVctRLyjfzjrEnPploxp.jpg,2022-02-23,Three Months,False,0.0,0,[Drama]
3,False,,[10749],929477,en,Heart Shot,Teenagers Nikki and Sam are in love and planni...,4.903,,2022-02-17,Heart Shot,False,0.0,0,[Romance]
4,False,/qQFvUg92Uh1jRXCNgqo9LduBDFD.jpg,[18],920345,en,EMPATHY (Or: The Girl with The Pearled Hair),After running away from her emotionally abusiv...,2.928,/59C1l5dqtKuKuKy5qlE23PV4yC6.jpg,2022-01-25,EMPATHY (Or: The Girl with The Pearled Hair),False,0.0,0,[Drama]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,False,/ti0VLq5Xv8PicnLAfcrnFmNsYmY.jpg,"[35, 10749]",248757,en,Punks,"In his directorial debut, Patrik-Ian Polk chro...",1.303,/3an3l9X95zTx8vtzxqYMhCxuBOz.jpg,2000-01-24,Punks,False,4.7,6,[Documentary]
14,False,/dD90r6NQ8cFgYjjYGSLRQLCdJWN.jpg,[18],515728,en,Hitch,Two friends are on a road trip and a one-sided...,1.790,/4kH0kEKmrDtxxJh0rSUtecJYOpK.jpg,2000-01-01,Hitch,False,5.1,8,"[Romance, Comedy, Drama]"
15,False,/n6kR7P93mUNj70zSdQLOWtExbyw.jpg,[],306484,en,Meet Joe Gay,Director Benjamin Morgan takes a hard look at ...,1.181,/pJYcgatiDE6eYwvO25baVoDkb0a.jpg,2000-01-01,Meet Joe Gay,False,4.0,5,"[Romance, Comedy]"
16,False,,[99],262942,en,A Boy Named Sue,A Boy Named Sue chronicles the transformation ...,1.583,/9uTNEluw8FAos3trAXqE7ffllZq.jpg,2000-01-01,A Boy Named Sue,False,3.0,2,[Documentary]


In [66]:
# Create function that combines all individual rating dataframes into one.
# def combine_ratings(rating_df_list):
#     ratings_df = pd.concat(rating_df_list).set_index('id')
#     return ratings_df

def combining_data(main_df, rating_df_list, info_df):
    # Reorder columns.
    new_columns = [
        'id', 'original_title', 'imdb_id', 'genre_ids',
        'genre', 'rating',
        'overview', 'popularity', 'release_date','budget',
        'revenue', 'runtime', 'status', 'title']
    main_df = main_df.set_index('id')
    def combine_ratings(rating_df_list):
        ratings_df = pd.concat(rating_df_list).set_index('id')
        return ratings_df
    
    # Create function to add genre names
    def add_genre_names(main_df):
        # Create genre_url variable
        genre_url = "https://api.themoviedb.org/3/genre/movie/list?api_key=" + tmdb_api_key + "&language=en-US"
        genres_df = pd.DataFrame()
        genres_df['genre'] = main_df['genre_ids']
        genres_df = pd.DataFrame(
        genres_df['genre'].to_list(), columns=['genre1', 'genre2', 'genre3', 'genre4', 'genre5', 'genre6'])
        def get_genres(url_for_genres):
            genre_request = requests.get(url_for_genres)
            genre_data = genre_request.json()
            genre_info = genre_data['genres']
            return genre_info
        ids = [i['id'] for i in get_genres(genre_url)]
        names = [n['name'] for n in get_genres(genre_url)]
        genres_df = genres_df.replace(ids, names)
        genres_df['genres'] = genres_df.values.tolist()
        genres_df['genres'] = genres_df.genres.apply(lambda x: [genre for genre in x if not pd.isnull(genre)])
        main_df['genre'] = genres_df['genres']
        return main_df
    
    main_df = main_df.join(ratings_df, on='id', how='left').reset_index()
    main_df['rating'].fillna('NR', inplace=True)
    main_df = main_df.join(info_df, how='left')
    main_df = main_df[new_columns]
    return main_df

In [101]:
nc17_lgbt_df = isolate_ratings(nc17_lgbt_df, "NC-17")
r_lgbt_df = isolate_ratings(r_lgbt_df, "R")

In [68]:
lgbt_ratings_df = combine_ratings([r_lgbt_df, nc17_lgbt_df])

In [69]:
lgbt_ratings_df

Unnamed: 0_level_0,rating
id,Unnamed: 1_level_1
876802,R
915164,R
857731,R
552269,R
591273,R
...,...
95048,R
22597,R
526034,NC-17
94025,NC-17


In [102]:
r_lgbt_df.head()

Unnamed: 0,id,rating
0,876802,R
1,915164,R
2,857731,R
3,552269,R
4,591273,R


In [14]:
[ratings[ratings[n] for n in range(len(ratings))]]

SyntaxError: invalid syntax (<ipython-input-14-cc956e510930>, line 1)

In [1]:
ratings = ["NR", "G", "PG", "PG-13", "R", "NC-17"]