# TMdB API

This notebook focuses on gathering movie information from the TMdB API. By the end of the notebook, the final dataframe should consist of the following columns: film name, director, letterboxd ID, Letterboxd link, TMdB ID, year, tagline, genres, run time, average rating, overview, and languages.

In [1]:
# import necessary libraries
import pandas as pd
import tmdbsimple as tmdb


In [2]:
# Input api key, I am using a path to a txt file to hide the api key
api_key = pd.read_csv('/Users/Crisitna/Desktop/FlatIron/Capstone/api.txt')
api_key = api_key.columns[0]
tmdb.API_KEY = api_key

In [3]:
# Load dataframe that will add information
film_df = pd.read_csv('panda_dataframes/letterboxd_film_data_director.csv')
print(film_df.info())
film_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71928 entries, 0 to 71927
Data columns (total 7 columns):
film_name       71872 non-null object
lb_id           71928 non-null int64
lb_link         71928 non-null object
tmdb_id         71928 non-null int64
movie_tv        71928 non-null object
release_year    71927 non-null object
director        71909 non-null object
dtypes: int64(2), object(5)
memory usage: 3.8+ MB
None


Unnamed: 0,film_name,lb_id,lb_link,tmdb_id,movie_tv,release_year,director
0,Parasite,426406,https://letterboxd.com//film/parasite-2019/,496243,movie,2019,Bong Joon-ho
1,Joker,406775,https://letterboxd.com//film/joker-2019/,475557,movie,2019,Todd Phillips
2,Knives Out,475370,https://letterboxd.com//film/knives-out-2019/,546554,movie,2019,Rian Johnson
3,Pulp Fiction,51444,https://letterboxd.com//film/pulp-fiction/,680,movie,1994,Quentin Tarantino
4,Inception,34722,https://letterboxd.com//film/inception/,27205,movie,2010,Christopher Nolan


We want the final database to have the following columns:  film name, director, letterboxd ID, Letterboxd link, TMdB ID, year, tagline, genres, run time, average rating, overview, and languages.

This means we need still need these columns:

In [4]:
# A quick look at the data any one movie will contain
movie = tmdb.Movies(496243)

movie.info()

{'adult': False,
 'backdrop_path': '/TU9NIjwzjoKPwQHoHshkFcQUCG.jpg',
 'belongs_to_collection': None,
 'budget': 11363000,
 'genres': [{'id': 35, 'name': 'Comedy'},
  {'id': 53, 'name': 'Thriller'},
  {'id': 18, 'name': 'Drama'}],
 'homepage': 'https://www.parasite-movie.com/',
 'id': 496243,
 'imdb_id': 'tt6751668',
 'original_language': 'ko',
 'original_title': '기생충',
 'overview': "All unemployed, Ki-taek's family takes peculiar interest in the wealthy and glamorous Parks for their livelihood until they get entangled in an unexpected incident.",
 'popularity': 101.267,
 'poster_path': '/7IiTTgloJzvGI1TAYymCfbfl3vT.jpg',
 'production_companies': [{'id': 4399,
   'logo_path': '/7bWmbWfxFNSGTCjLHkHn3UjspZS.png',
   'name': 'Barunson E&A',
   'origin_country': 'KR'},
  {'id': 7036,
   'logo_path': '/javbyY0ZCvlFJtly3tpZqf2NwLX.png',
   'name': 'CJ Entertainment',
   'origin_country': 'KR'}],
 'production_countries': [{'iso_3166_1': 'KR', 'name': 'South Korea'}],
 'release_date': '2019-05

In [5]:
# Isolating the title
movie.title

'Parasite'

In [11]:
# Create a test df to test code before using on larger dataframe
test = film_df[0:50].copy()
test.head()

Unnamed: 0,film_name,lb_id,lb_link,tmdb_id,movie_tv,release_year,director
0,Parasite,426406,https://letterboxd.com//film/parasite-2019/,496243,movie,2019,Bong Joon-ho
1,Joker,406775,https://letterboxd.com//film/joker-2019/,475557,movie,2019,Todd Phillips
2,Knives Out,475370,https://letterboxd.com//film/knives-out-2019/,546554,movie,2019,Rian Johnson
3,Pulp Fiction,51444,https://letterboxd.com//film/pulp-fiction/,680,movie,1994,Quentin Tarantino
4,Inception,34722,https://letterboxd.com//film/inception/,27205,movie,2010,Christopher Nolan


In [12]:
# Check function
def create_film_df(df):
    film_languages = []
    film_genres = []
    for x in range(0, len(df['tmdb_id'])):
        try: 
            id_num = df.loc[x, 'tmdb_id']

            movie = tmdb.Movies(id_num)
            response = movie.info()
    
            df.loc[x, 'run_time'] = response['runtime']
            df.loc[x, 'vote_average'] = response['vote_average']
            df.loc[x, 'tagline'] = response['tagline']
            df.loc[x, 'overview'] = response['overview']

            # Make list of genres
            g = response['genres']
            genres = []
            for y in range(0, len(g)):
                s = list(g[y].values())
                genres.append(s[1])
            film_genres.append(genres)
    
            # Make list of language
            l = response['spoken_languages']
            lang = []
            for y in range(0, len(l)):
                s = list(l[y].values())
                lang.append(s[0])
            film_languages.append(lang)
    
        except:
            df.loc[x, 'run_time'] = 'None'
            df.loc[x, 'vote_average'] = 'None'
            df.loc[x, 'tagline'] = 'None'
            df.loc[x, 'overview'] = 'None'
            film_languages.append('None')
            film_genres.append('None')

    df['languages'] = film_languages

    df['genres'] = film_genres

    df.head()
    
    return df

In [13]:
create_film_df(test)

Unnamed: 0,film_name,lb_id,lb_link,tmdb_id,movie_tv,release_year,director,run_time,vote_average,tagline,overview,languages,genres
0,Parasite,426406,https://letterboxd.com//film/parasite-2019/,496243,movie,2019,Bong Joon-ho,133.0,8.5,Act like you own the place.,"All unemployed, Ki-taek's family takes peculia...","[English, German, Korean]","[Comedy, Thriller, Drama]"
1,Joker,406775,https://letterboxd.com//film/joker-2019/,475557,movie,2019,Todd Phillips,122.0,8.2,Put on a happy face.,"During the 1980s, a failed stand-up comedian i...",[English],"[Crime, Thriller, Drama]"
2,Knives Out,475370,https://letterboxd.com//film/knives-out-2019/,546554,movie,2019,Rian Johnson,131.0,7.8,"Hell, any of them could have done it.",When renowned crime novelist Harlan Thrombey i...,"[Spanish, English]","[Comedy, Crime, Drama, Mystery]"
3,Pulp Fiction,51444,https://letterboxd.com//film/pulp-fiction/,680,movie,1994,Quentin Tarantino,154.0,8.5,Just because you are a character doesn't mean ...,"A burger-loving hit man, his philosophical par...","[English, Spanish, French]","[Thriller, Crime]"
4,Inception,34722,https://letterboxd.com//film/inception/,27205,movie,2010,Christopher Nolan,148.0,8.3,Your mind is the scene of the crime.,"Cobb, a skilled thief who commits corporate es...","[English, Japanese, French]","[Action, Science Fiction, Adventure]"
5,Get Out,353117,https://letterboxd.com//film/get-out-2017/,419430,movie,2017,Jordan Peele,104.0,7.6,"Just because you're invited, doesn't mean you'...",Chris and his girlfriend Rose go upstate to vi...,[English],"[Mystery, Thriller, Horror]"
6,Once Upon a Time… in Hollywood,397859,https://letterboxd.com//film/once-upon-a-time-...,466272,movie,2019,Quentin Tarantino,162.0,7.5,"In this town, it can all change… like that","Los Angeles, 1969. TV star Rick Dalton, a stru...","[English, Italian, Spanish]","[Comedy, Drama, Thriller]"
7,Lady Bird,326279,https://letterboxd.com//film/lady-bird/,391713,movie,2017,Greta Gerwig,94.0,7.3,Fly Away Home.,"Lady Bird McPherson, a strong willed, deeply o...",[English],"[Drama, Comedy]"
8,Spider-Man: Into the Spider-Verse,251943,https://letterboxd.com//film/spider-man-into-t...,324857,movie,2018,Rodney Rothman,117.0,8.4,More Than One Wears the Mask,Miles Morales is juggling his life between bei...,"[English, Spanish]","[Action, Adventure, Animation, Science Fiction..."
9,Midsommar,459564,https://letterboxd.com//film/midsommar/,530385,movie,2019,Ari Aster,148.0,7.1,Let the festivities begin.,Several friends travel to Sweden to study as a...,"[English, Swedish]","[Horror, Drama, Mystery]"


In [None]:
df = create_film_df(film_df)

In [None]:
for x in range(0, len(film_df['tmdb_id'])):
    if film_df.iloc[x]['movie_tv'] == 'movie':
        id_num = film_df.iloc[x]['tmdb_id']

        movie = tmdb.Movies(id_num)
        response = movie.info

        try:
            # Make list of language
            l = movie.spoken_languages
            lang = []
            for x in range(0, len(l)):
                s = list(l[x].values())
                lang.append(s[0])
            film_df.iloc[x]['language'] = lang

            # Make list of genres
            g = movie.genres
            genres = []
            for x in range(0, len(g)):
                s = list(g[x].values())
                genres.append(s[1])
            film_df.iloc[x]['genres'] = genres


            film_df.iloc[x]['average_rating'] = movie.vote_average
            film_df.iloc[x]['tagline'] = movie.tagline
            film_df.iloc[x]['runtime'] = movie.runtime
            film_df.iloc[x]['overview'] = movie.overview
        
    else: 
        id_num = film_df.iloc[x]['tmdb_id']

        tv = tmdb.TVMovie(id_num)

        # Make list of language
        l = tv.spoken_languages
        lang = []
        for x in range(0, len(l)):
            s = list(l[x].values())
            lang.append(s[0])
        film_df.iloc[x]['language'] = lang

        # Make list of genres
        g = tv.genres
        genres = []
        for x in range(0, len(g)):
            s = list(g[x].values())
            genres.append(s[1])
        film_df.iloc[x]['genres'] = genres


        film_df.iloc[x]['average_rating'] = tv.vote_average
        film_df.iloc[x]['tagline'] = tv.tagline
        film_df.iloc[x]['runtime'] = tv.runtime
        film_df.iloc[x]['overview'] = tv.overview


In [None]:
# save df_film to computer
df.to_csv(r'panda_dataframes/letterboxd_film_data.csv', index = False)