# API Calls
### APIs Used:
- [OMDB](https://www.omdbapi.com/)
- [TMDB](https://www.themoviedb.org/documentation/api?language=en-US)

### Movie Lists:
- [MovieLens](https://grouplens.org/datasets/movielens/)
- want to also do top 1000 grossing movies as well just to make sure they're included

In [10]:
import os
import requests
import json
import pandas as pd
from pandas import json_normalize
import numpy as np

In [23]:
# TMDB API Key
tmdb_secret = os.environ['TMDB_API']

# OMDB API Key
omdb_secret = os.environ['OMDB_API']

In [3]:
# importing movielens data for our main sample
list_df = pd.read_csv('Data/MovieLens/links.csv')

In [4]:
list_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [57]:
list_df.shape[0]

9742

In [6]:
list_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  9742 non-null   int64  
 1   imdbId   9742 non-null   int64  
 2   tmdbId   9734 non-null   float64
dtypes: float64(1), int64(2)
memory usage: 228.5 KB


In [80]:
# removing decimals from TMDB ids (just converting to integers for convenience) & then converting to strings for API call
#list_df['tmdbId'] = list_df['tmdbId'].astype('Int64')
list_df['tmdbId'] = list_df['tmdbId'].astype(str)

In [81]:
list_df.iloc[0,2]

'862'

In [82]:
list_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int64 
 1   imdbId   9742 non-null   int64 
 2   tmdbId   9742 non-null   object
dtypes: int64(2), object(1)
memory usage: 228.5+ KB


### TMDB API Request

In [50]:
def tmdb_request(tmdb_id):
    """
    Constructing an api request for TMDB (The Movie Database) based on a given tmdb id, 
    then returning the json file
    
    Parameters:
        tmdb_id: movie id from TMDB
        
    Returns:
        tmdb_data: json data on given id
    """
    # construct url
    url = 'https://api.themoviedb.org/3/movie/' + tmdb_id + '?&api_key=' + tmdb_secret
    
    payload = {}
    headers = {}
    
    # perform get request
    response = requests.get(url, headers=headers, data = payload)
    
    # process request into usable JSON file
    tmdb_response = json.loads(response.text)
    tmdb_data.append(tmdb_response)

    return

In [None]:
# testing call

In [51]:
tmdb_data = []
tmdb_request('862')

In [52]:
tmdb_data

[{'adult': False,
  'backdrop_path': '/3Rfvhy1Nl6sSGJwyjb0QiZzZYlB.jpg',
  'belongs_to_collection': {'id': 10194,
   'name': 'Toy Story Collection',
   'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg',
   'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'},
  'budget': 30000000,
  'genres': [{'id': 16, 'name': 'Animation'},
   {'id': 12, 'name': 'Adventure'},
   {'id': 10751, 'name': 'Family'},
   {'id': 35, 'name': 'Comedy'}],
  'homepage': 'http://toystory.disney.com/toy-story',
  'id': 862,
  'imdb_id': 'tt0114709',
  'original_language': 'en',
  'original_title': 'Toy Story',
  'overview': "Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.",
  'popularity': 121.595,
  'poster_path': '/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg',
  'productio

In [83]:
tmdb_data = []
def tmdb_req_loop():
    """
    Iterates through the movielens dataframe & runs an API request to tmdb for each movie id
           
    Returns:
        Complete json data for every id in our movie dataframe
    """
    
    num_rows = list_df.shape[0]
    row_count = 0
    
    while row_count < num_rows:
        tmdb_request((list_df.iloc[row_count,2]))
        row_count +=1

In [84]:
tmdb_req_loop()

In [85]:
tmdb_data

[{'adult': False,
  'backdrop_path': '/3Rfvhy1Nl6sSGJwyjb0QiZzZYlB.jpg',
  'belongs_to_collection': {'id': 10194,
   'name': 'Toy Story Collection',
   'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg',
   'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'},
  'budget': 30000000,
  'genres': [{'id': 16, 'name': 'Animation'},
   {'id': 12, 'name': 'Adventure'},
   {'id': 10751, 'name': 'Family'},
   {'id': 35, 'name': 'Comedy'}],
  'homepage': 'http://toystory.disney.com/toy-story',
  'id': 862,
  'imdb_id': 'tt0114709',
  'original_language': 'en',
  'original_title': 'Toy Story',
  'overview': "Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.",
  'popularity': 121.595,
  'poster_path': '/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg',
  'productio

In [91]:
tmdb_df_raw = pd.json_normalize(tmdb_data)

In [92]:
tmdb_df_raw.head()

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,vote_average,vote_count,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,belongs_to_collection,success,status_code,status_message
0,False,/3Rfvhy1Nl6sSGJwyjb0QiZzZYlB.jpg,30000000.0,"[{'id': 16, 'name': 'Animation'}, {'id': 12, '...",http://toystory.disney.com/toy-story,862.0,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,7.965,16207.0,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,,,,
1,False,/pYw10zrqfkdm3yD9JTO6vEGQhKy.jpg,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",http://www.sonypictures.com/movies/jumanji/,8844.0,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,7.2,9368.0,495527.0,Jumanji Collection,/6sjMsBcIuqU44GpG5tL33KUFOQR.jpg,/9PCsWrw1GvrZkrd1GCxRqscgZu0.jpg,,,,
2,False,/1J4Z7VhdAgtdd97nCxY7dcBpjGT.jpg,25000000.0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602.0,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,6.441,316.0,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,,,,
3,False,/jZjoEKXMTDoZAGdkjhAdJaKtXSN.jpg,16000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357.0,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,6.3,128.0,,,,,,,,
4,False,/lEsjVrGU21BeJjF5AF9EWsihDpw.jpg,0.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",,11862.0,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,6.228,621.0,96871.0,Father of the Bride (Steve Martin) Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,,,,


In [98]:
# exporting raw dataframe
tmdb_df_raw.to_csv('tmdb_raw_data.csv', index=False)

### OMDB API Request

In [100]:
# will be using TMDB data to get data from OMDB, as the IMDB ids in the movielens data appear to be slightly incorrect

In [127]:
tmdb_df_raw = pd.read_csv('tmdb_raw_data.csv')

In [128]:
tmdb_df_raw.head()

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,vote_average,vote_count,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,belongs_to_collection,success,status_code,status_message
0,False,/3Rfvhy1Nl6sSGJwyjb0QiZzZYlB.jpg,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 12, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,7.965,16207,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,,,,
1,False,/pYw10zrqfkdm3yD9JTO6vEGQhKy.jpg,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",http://www.sonypictures.com/movies/jumanji/,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,7.2,9368,495527.0,Jumanji Collection,/6sjMsBcIuqU44GpG5tL33KUFOQR.jpg,/9PCsWrw1GvrZkrd1GCxRqscgZu0.jpg,,,,
2,False,/1J4Z7VhdAgtdd97nCxY7dcBpjGT.jpg,25000000,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,6.441,316,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,,,,
3,False,/jZjoEKXMTDoZAGdkjhAdJaKtXSN.jpg,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,6.3,128,,,,,,,,
4,False,/lEsjVrGU21BeJjF5AF9EWsihDpw.jpg,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,6.228,621,96871.0,Father of the Bride (Steve Martin) Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,,,,


In [129]:
def omdb_request(imdb_id):
    """
    Constructing an api request for OMDB (the Open Movie Database) based on a given id, 
    then returning the json file
    
    Parameters:
        imdb_id: movie id from IMDB
        
    Returns:
        omdb_data: json data on given id
    """
    # construct url
    url = 'http://www.omdbapi.com/?apikey=' + omdb_secret + '&i=' + imdb_id
        
    payload = {}
    headers = {}
    
    # perform get request
    response = requests.get(url, headers=headers, data = payload)
    
    # process request into usable JSON file
    omdb_response = json.loads(response.text)
    omdb_data.append(omdb_response)

    return

In [130]:
# testing call
omdb_data = []
omdb_request('tt0114709')

In [131]:
omdb_data

[{'Title': 'Toy Story',
  'Year': '1995',
  'Rated': 'G',
  'Released': '22 Nov 1995',
  'Runtime': '81 min',
  'Genre': 'Animation, Adventure, Comedy',
  'Director': 'John Lasseter',
  'Writer': 'John Lasseter, Pete Docter, Andrew Stanton',
  'Actors': 'Tom Hanks, Tim Allen, Don Rickles',
  'Plot': "A cowboy doll is profoundly threatened and jealous when a new spaceman action figure supplants him as top toy in a boy's bedroom.",
  'Language': 'English',
  'Country': 'United States',
  'Awards': 'Nominated for 3 Oscars. 28 wins & 23 nominations total',
  'Poster': 'https://m.media-amazon.com/images/M/MV5BMDU2ZWJlMjktMTRhMy00ZTA5LWEzNDgtYmNmZTEwZTViZWJkXkEyXkFqcGdeQXVyNDQ2OTk4MzI@._V1_SX300.jpg',
  'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.3/10'},
   {'Source': 'Rotten Tomatoes', 'Value': '100%'},
   {'Source': 'Metacritic', 'Value': '95/100'}],
  'Metascore': '95',
  'imdbRating': '8.3',
  'imdbVotes': '994,483',
  'imdbID': 'tt0114709',
  'Type': 'movie',
  'DVD': 

In [135]:
omdb_data = []
def omdb_req_loop():
    """
    Iterates through the tmdb dataframe & runs an API request to omdb for each movie id
           
    Returns:
        Complete json data for every id in our movie dataframe
    """
    
    num_rows = tmdb_df_raw.shape[0]
    row_count = 0
    
    while row_count < num_rows:
        omdb_request((tmdb_df_raw.iloc[row_count,6]))
        row_count +=1

In [136]:
omdb_req_loop()

IndexError: index 9626 is out of bounds for axis 0 with size 9626

In [137]:
omdb_data

[{'Title': 'Toy Story',
  'Year': '1995',
  'Rated': 'G',
  'Released': '22 Nov 1995',
  'Runtime': '81 min',
  'Genre': 'Animation, Adventure, Comedy',
  'Director': 'John Lasseter',
  'Writer': 'John Lasseter, Pete Docter, Andrew Stanton',
  'Actors': 'Tom Hanks, Tim Allen, Don Rickles',
  'Plot': "A cowboy doll is profoundly threatened and jealous when a new spaceman action figure supplants him as top toy in a boy's bedroom.",
  'Language': 'English',
  'Country': 'United States',
  'Awards': 'Nominated for 3 Oscars. 28 wins & 23 nominations total',
  'Poster': 'https://m.media-amazon.com/images/M/MV5BMDU2ZWJlMjktMTRhMy00ZTA5LWEzNDgtYmNmZTEwZTViZWJkXkEyXkFqcGdeQXVyNDQ2OTk4MzI@._V1_SX300.jpg',
  'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.3/10'},
   {'Source': 'Rotten Tomatoes', 'Value': '100%'},
   {'Source': 'Metacritic', 'Value': '95/100'}],
  'Metascore': '95',
  'imdbRating': '8.3',
  'imdbVotes': '994,483',
  'imdbID': 'tt0114709',
  'Type': 'movie',
  'DVD': 

In [139]:
omdb_df_raw = pd.json_normalize(omdb_data)

In [140]:
omdb_df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9626 entries, 0 to 9625
Data columns (total 29 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Title         9626 non-null   object
 1   Year          9626 non-null   object
 2   Rated         9626 non-null   object
 3   Released      9626 non-null   object
 4   Runtime       9626 non-null   object
 5   Genre         9626 non-null   object
 6   Director      9626 non-null   object
 7   Writer        9626 non-null   object
 8   Actors        9626 non-null   object
 9   Plot          9626 non-null   object
 10  Language      9626 non-null   object
 11  Country       9626 non-null   object
 12  Awards        9626 non-null   object
 13  Poster        9626 non-null   object
 14  Ratings       9626 non-null   object
 15  Metascore     9626 non-null   object
 16  imdbRating    9626 non-null   object
 17  imdbVotes     9626 non-null   object
 18  imdbID        9626 non-null   object
 19  Type  

In [142]:
omdb_df_raw.head()

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,...,Type,DVD,BoxOffice,Production,Website,Response,Season,Episode,seriesID,totalSeasons
0,Toy Story,1995,G,22 Nov 1995,81 min,"Animation, Adventure, Comedy",John Lasseter,"John Lasseter, Pete Docter, Andrew Stanton","Tom Hanks, Tim Allen, Don Rickles",A cowboy doll is profoundly threatened and jea...,...,movie,23 Mar 2010,"$223,225,679",,,True,,,,
1,Jumanji,1995,PG,15 Dec 1995,104 min,"Adventure, Comedy, Family",Joe Johnston,"Jonathan Hensleigh, Greg Taylor, Jim Strain","Robin Williams, Kirsten Dunst, Bonnie Hunt",When two kids find and play a magical board ga...,...,movie,,"$100,499,940",,,True,,,,
2,Grumpier Old Men,1995,PG-13,22 Dec 1995,101 min,"Comedy, Romance",Howard Deutch,Mark Steven Johnson,"Walter Matthau, Jack Lemmon, Ann-Margret",John and Max resolve to save their beloved bai...,...,movie,15 May 2007,"$71,518,503",,,True,,,,
3,Waiting to Exhale,1995,R,22 Dec 1995,124 min,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan, Ron Bass","Whitney Houston, Angela Bassett, Loretta Devine","Based on Terry McMillan's novel, this film fol...",...,movie,20 Jul 1999,"$67,052,156",,,True,,,,
4,Father of the Bride Part II,1995,PG,08 Dec 1995,106 min,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett, Frances Goodrich, Nancy Meyers","Steve Martin, Diane Keaton, Martin Short",George Banks must deal not only with his daugh...,...,movie,25 Jan 2005,"$76,594,107",,,True,,,,


In [143]:
# exporting raw dataframe
omdb_df_raw.to_csv('omdb_raw_data.csv', index=False)