# Extracting Movies with tmbd api


## References

- Getting Started 
    - https://developers.themoviedb.org/3/getting-started/introduction

- API info saved in ~/.secret/tmdg_api.json
    - https://www.themoviedb.org/settings/api
```python
f"https://api.themoviedb.org/3/movie/76341?api_key={api_key}"
```

- How to authenticate?
    - [Authentification](https://developers.themoviedb.org/3/getting-started/authentication)
    - [Create auth token](https://developers.themoviedb.org/3/authentication/create-request-token)

In [1]:
import json
import pandas as pd
import requests



In [2]:
with open('/Users/jamesirving/.secret/tmdb_api.json') as f:
    api_info = json.load(f)
api_key = api_info['api-key']

## Searching With Discover

In [3]:
url = f"https://api.themoviedb.org/3/discover/movie?api_key={api_key}&language=en-US&sort_by=popularity.desc&include_adult=false&include_video=false&page=1"
url+='&year=2018'

In [4]:
resp = requests.get(url)
resp

<Response [200]>

In [5]:
resp.json()

{'page': 1,
 'results': [{'adult': False,
   'backdrop_path': '/VuukZLgaCrho2Ar8Scl9HtV3yD.jpg',
   'genre_ids': [878, 28],
   'id': 335983,
   'original_language': 'en',
   'original_title': 'Venom',
   'overview': 'Investigative journalist Eddie Brock attempts a comeback following a scandal, but accidentally becomes the host of Venom, a violent, super powerful alien symbiote. Soon, he must rely on his newfound powers to protect the world from a shadowy organization looking for a symbiote of their own.',
   'popularity': 433.549,
   'poster_path': '/2uNW4WbgBXL25BAbXGLnLqX71Sw.jpg',
   'release_date': '2018-09-28',
   'title': 'Venom',
   'video': False,
   'vote_average': 6.8,
   'vote_count': 11239},
  {'adult': False,
   'backdrop_path': '/lmZFxXgJE3vgrciwuDib0N8CfQo.jpg',
   'genre_ids': [12, 28, 878],
   'id': 299536,
   'original_language': 'en',
   'original_title': 'Avengers: Infinity War',
   'overview': 'As the Avengers and their allies have continued to protect the world fr

In [7]:
df = pd.read_csv('movies_combined_tmdb_id_for_api.csv')
df

Unnamed: 0,tmdb_id,tconst,primary_title,original_title,movie,runtime_minutes,genres,genre_ids,averagerating,numvotes,release_date_x,release_date_y,vote_average,vote_count,popularity,domestic_gross,production_budget,worldwide_gross
0,116977,tt0249516,Foodfight!,Foodfight!,Foodfight!,91.0,"Action,Animation,Comedy","[16, 28, 35, 10751]",1.9,8248,2012-12-31,2013-05-07,2.1,46,4.705,0,45000000,73706
1,308024,tt0326592,The Overnight,The Overnight,The Overnight,88.0,MISSING,"[9648, 35]",7.5,24,2015-06-19,2015-06-19,6.0,200,6.576,1109808,200000,1165996
2,308024,tt3844362,The Overnight,The Overnight,The Overnight,79.0,"Comedy,Mystery","[9648, 35]",6.1,14828,2015-06-19,2015-06-19,6.0,200,6.576,1109808,200000,1165996
3,83770,tt0337692,On the Road,On the Road,On the Road,124.0,"Adventure,Drama,Romance","[12, 18]",6.1,37886,2013-03-22,2012-12-21,5.6,518,8.919,720828,25000000,9313302
4,83770,tt4339118,On the Road,On the Road,On the Road,89.0,Drama,"[12, 18]",6.0,6,2013-03-22,2012-12-21,5.6,518,8.919,720828,25000000,9313302
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1862,547590,tt7374952,El Chicano,El Chicano,El Chicano,108.0,Drama,"[18, 28, 80]",5.7,290,2019-05-03,2019-05-03,9.0,1,5.274,700261,8000000,700261
1863,476968,tt7388562,"Paul, Apostle of Christ","Paul, Apostle of Christ","Paul, Apostle of Christ",108.0,"Adventure,Biography,Drama",[36],6.7,5662,2018-03-23,2018-03-28,7.1,98,12.005,17547999,5000000,25529498
1864,491418,tt7401588,Instant Family,Instant Family,Instant Family,118.0,"Comedy,Drama","[35, 18]",7.4,46728,2018-11-16,2018-11-16,7.6,782,22.634,67363237,48000000,119736188
1865,493922,tt7784604,Hereditary,Hereditary,Hereditary,127.0,"Drama,Horror,Mystery","[27, 9648, 53]",7.3,151571,2018-06-08,2018-06-08,7.0,2491,26.185,44069456,10000000,70133905


In [8]:
import os,glob
dftm = pd.read_csv('zippedData/tmdb.movies.csv.gz',index_col=0)

dftm = dftm.loc[dftm['original_language']=='en']
dftm['release_date'] = pd.to_datetime(dftm['release_date'])
dftm

Unnamed: 0,genre_ids,id,original_language,original_title,popularity,release_date,title,vote_average,vote_count
0,"[12, 14, 10751]",12444,en,Harry Potter and the Deathly Hallows: Part 1,33.533,2010-11-19,Harry Potter and the Deathly Hallows: Part 1,7.7,10788
1,"[14, 12, 16, 10751]",10191,en,How to Train Your Dragon,28.734,2010-03-26,How to Train Your Dragon,7.7,7610
2,"[12, 28, 878]",10138,en,Iron Man 2,28.515,2010-05-07,Iron Man 2,6.8,12368
3,"[16, 35, 10751]",862,en,Toy Story,28.005,1995-11-22,Toy Story,7.9,10174
4,"[28, 878, 12]",27205,en,Inception,27.920,2010-07-16,Inception,8.3,22186
...,...,...,...,...,...,...,...,...,...
26512,"[27, 18]",488143,en,Laboratory Conditions,0.600,2018-10-13,Laboratory Conditions,0.0,1
26513,"[18, 53]",485975,en,_EXHIBIT_84xxx_,0.600,2018-05-01,_EXHIBIT_84xxx_,0.0,1
26514,"[14, 28, 12]",381231,en,The Last One,0.600,2018-10-01,The Last One,0.0,1
26515,"[10751, 12, 28]",366854,en,Trailer Made,0.600,2018-06-22,Trailer Made,0.0,1


In [9]:
dftm = dftm.set_index('release_date').sort_index()#.index
dftm = dftm.loc['2010':]
dftm

Unnamed: 0_level_0,genre_ids,id,original_language,original_title,popularity,title,vote_average,vote_count
release_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-01,[],433335,en,Servants of Mercy,0.624,Servants of Mercy,8.0,1
2010-01-01,"[28, 878]",100689,en,Swine,0.624,Swine,6.0,1
2010-01-01,[35],59982,en,Meeting Spencer,0.624,Meeting Spencer,6.0,1
2010-01-01,[],141991,en,My Eyes Won't Dry 3 - Tubular Addiction,0.600,My Eyes Won't Dry 3 - Tubular Addiction,8.5,1
2010-01-01,[35],144907,en,Tetherball: The Movie,0.624,Tetherball: The Movie,5.0,1
...,...,...,...,...,...,...,...,...
2019-05-17,[18],481880,en,Trial by Fire,4.480,Trial by Fire,7.0,3
2019-05-17,"[18, 9648, 53]",411144,en,We Have Always Lived in the Castle,14.028,We Have Always Lived in the Castle,5.2,24
2019-06-28,[99],541577,en,This Changes Everything,3.955,This Changes Everything,0.0,1
2019-06-28,"[10749, 18]",428836,en,Ophelia,8.715,Ophelia,0.0,4


### API Authentification
- Two options
    - Api Key ( v3)
    - Bearer Token (v4 - better?)

In [10]:
base_url = f"https://api.themoviedb.org/3/movie/76341?api_key={api_key}"
# base_url

In [11]:
import requests
## get session id
def get_session_id():
    token_url = f"https://api.themoviedb.org/3/authentication/token/new?api_key={api_key}"
    response = requests.get(token_url)
    display(response)
    return response.json()

In [12]:
sid = get_session_id()
sid.keys()

<Response [200]>

dict_keys(['success', 'expires_at', 'request_token'])

In [13]:
sid_token = sid['request_token'];

In [14]:
#?f"Authorization: Bearer {access_token}"

# def get_bearer_token(sid=None):
#     """https://developers.themoviedb.org/3/authentication/how-do-i-generate-a-session-id
#     1. Create a new 
#     2. Get the user to authorize the request token
#     3. Create a new  with the athorized request token"""
#     if sid is None:
#         sid = get_session_id()
        
#     if isinstance(sid,dict):
#         token = sid['request_token']
#     elif isintance(sid,str):
#         token = sid
        
#     header = f"Authorization: Bearer {token}"
#     requests.get(url = , headers=header )
        
# auth_url - f"https://www.themoviedb.org/authenticate/{REQUEST_TOKEN}"


In [15]:
ratings

NameError: name 'ratings' is not defined

df['tmdb_id']

### Make DF

In [16]:
DF = pd.merge(df,dftm,left_on='tmdb_id',right_on='id',how='inner')
DF

Unnamed: 0,tmdb_id,tconst,primary_title,original_title_x,movie,runtime_minutes,genres,genre_ids_x,averagerating,numvotes,...,production_budget,worldwide_gross,genre_ids_y,id,original_language,original_title_y,popularity_y,title,vote_average_y,vote_count_y
0,116977,tt0249516,Foodfight!,Foodfight!,Foodfight!,91.0,"Action,Animation,Comedy","[16, 28, 35, 10751]",1.9,8248,...,45000000,73706,"[16, 28, 35, 10751]",116977,en,Foodfight!,4.705,Foodfight!,2.1,46
1,308024,tt0326592,The Overnight,The Overnight,The Overnight,88.0,MISSING,"[9648, 35]",7.5,24,...,200000,1165996,"[9648, 35]",308024,en,The Overnight,6.576,The Overnight,6.0,200
2,308024,tt3844362,The Overnight,The Overnight,The Overnight,79.0,"Comedy,Mystery","[9648, 35]",6.1,14828,...,200000,1165996,"[9648, 35]",308024,en,The Overnight,6.576,The Overnight,6.0,200
3,83770,tt0337692,On the Road,On the Road,On the Road,124.0,"Adventure,Drama,Romance","[12, 18]",6.1,37886,...,25000000,9313302,"[12, 18]",83770,en,On the Road,8.919,On the Road,5.6,518
4,83770,tt4339118,On the Road,On the Road,On the Road,89.0,Drama,"[12, 18]",6.0,6,...,25000000,9313302,"[12, 18]",83770,en,On the Road,8.919,On the Road,5.6,518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2030,547590,tt7374952,El Chicano,El Chicano,El Chicano,108.0,Drama,"[18, 28, 80]",5.7,290,...,8000000,700261,"[18, 28, 80]",547590,en,El Chicano,5.274,El Chicano,9.0,1
2031,476968,tt7388562,"Paul, Apostle of Christ","Paul, Apostle of Christ","Paul, Apostle of Christ",108.0,"Adventure,Biography,Drama",[36],6.7,5662,...,5000000,25529498,[36],476968,en,"Paul, Apostle of Christ",12.005,"Paul, Apostle of Christ",7.1,98
2032,491418,tt7401588,Instant Family,Instant Family,Instant Family,118.0,"Comedy,Drama","[35, 18]",7.4,46728,...,48000000,119736188,"[35, 18]",491418,en,Instant Family,22.634,Instant Family,7.6,782
2033,493922,tt7784604,Hereditary,Hereditary,Hereditary,127.0,"Drama,Horror,Mystery","[27, 9648, 53]",7.3,151571,...,10000000,70133905,"[27, 9648, 53]",493922,en,Hereditary,26.185,Hereditary,7.0,2491


In [17]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2035 entries, 0 to 2034
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   tmdb_id            2035 non-null   int64  
 1   tconst             2035 non-null   object 
 2   primary_title      2035 non-null   object 
 3   original_title_x   2035 non-null   object 
 4   movie              2035 non-null   object 
 5   runtime_minutes    2035 non-null   float64
 6   genres             2035 non-null   object 
 7   genre_ids_x        2035 non-null   object 
 8   averagerating      2035 non-null   float64
 9   numvotes           2035 non-null   int64  
 10  release_date_x     2035 non-null   object 
 11  release_date_y     2035 non-null   object 
 12  vote_average_x     2035 non-null   float64
 13  vote_count_x       2035 non-null   int64  
 14  popularity_x       2035 non-null   float64
 15  domestic_gross     2035 non-null   int64  
 16  production_budget  2035 

In [18]:
dftm['id']

release_date
2010-01-01    433335
2010-01-01    100689
2010-01-01     59982
2010-01-01    141991
2010-01-01    144907
               ...  
2019-05-17    481880
2019-05-17    411144
2019-06-28    541577
2019-06-28    428836
2020-12-25    570704
Name: id, Length: 23106, dtype: int64

In [19]:
movie_id = dftm['id'][0]
print(dftm.iloc[0])
print(movie_id)

genre_ids                           []
id                              433335
original_language                   en
original_title       Servants of Mercy
popularity                       0.624
title                Servants of Mercy
vote_average                         8
vote_count                           1
Name: 2010-01-01 00:00:00, dtype: object
433335


In [20]:
movie_id = DF['tmdb_id'][0]
movie_id

116977

In [21]:
movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US"
movie_url

'https://api.themoviedb.org/3/movie/116977?api_key=e9a24ea6085f06fdd272dab49ad70c2e&language=en-US'

In [22]:
response = requests.get(movie_url)
response

<Response [200]>

In [23]:
response.json().keys()

dict_keys(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id', 'imdb_id', 'original_language', 'original_title', 'overview', 'popularity', 'poster_path', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'video', 'vote_average', 'vote_count'])

In [24]:
test_ids = DF['tmdb_id'].iloc[:4]
test_ids

0    116977
1    308024
2    308024
3     83770
Name: tmdb_id, dtype: int64

In [25]:
DF.head()

Unnamed: 0,tmdb_id,tconst,primary_title,original_title_x,movie,runtime_minutes,genres,genre_ids_x,averagerating,numvotes,...,production_budget,worldwide_gross,genre_ids_y,id,original_language,original_title_y,popularity_y,title,vote_average_y,vote_count_y
0,116977,tt0249516,Foodfight!,Foodfight!,Foodfight!,91.0,"Action,Animation,Comedy","[16, 28, 35, 10751]",1.9,8248,...,45000000,73706,"[16, 28, 35, 10751]",116977,en,Foodfight!,4.705,Foodfight!,2.1,46
1,308024,tt0326592,The Overnight,The Overnight,The Overnight,88.0,MISSING,"[9648, 35]",7.5,24,...,200000,1165996,"[9648, 35]",308024,en,The Overnight,6.576,The Overnight,6.0,200
2,308024,tt3844362,The Overnight,The Overnight,The Overnight,79.0,"Comedy,Mystery","[9648, 35]",6.1,14828,...,200000,1165996,"[9648, 35]",308024,en,The Overnight,6.576,The Overnight,6.0,200
3,83770,tt0337692,On the Road,On the Road,On the Road,124.0,"Adventure,Drama,Romance","[12, 18]",6.1,37886,...,25000000,9313302,"[12, 18]",83770,en,On the Road,8.919,On the Road,5.6,518
4,83770,tt4339118,On the Road,On the Road,On the Road,89.0,Drama,"[12, 18]",6.0,6,...,25000000,9313302,"[12, 18]",83770,en,On the Road,8.919,On the Road,5.6,518


In [26]:
movie_id = test_ids[0]
movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US"
response = requests.get(movie_url)

In [27]:
response.json()

{'adult': False,
 'backdrop_path': '/w5XEKGEGG2AXp7tiXjcsEuxn7i5.jpg',
 'belongs_to_collection': None,
 'budget': 65000000,
 'genres': [{'id': 16, 'name': 'Animation'},
  {'id': 28, 'name': 'Action'},
  {'id': 35, 'name': 'Comedy'},
  {'id': 10751, 'name': 'Family'}],
 'homepage': 'http://www.thresholdanimationstudios.com/video.php?id=foodfight',
 'id': 116977,
 'imdb_id': 'tt0249516',
 'original_language': 'en',
 'original_title': 'Foodfight!',
 'overview': 'The evil Brand X joins a supermarket that becomes a city after closing time.',
 'popularity': 8.324,
 'poster_path': '/R1zn75Yz4wpdd85byqhP7mT6No.jpg',
 'production_companies': [{'id': 4174,
   'logo_path': None,
   'name': 'Threshold Entertainment',
   'origin_country': ''},
  {'id': 1632,
   'logo_path': '/cisLn1YAUuptXVBa0xjq7ST9cH0.png',
   'name': 'Lionsgate',
   'origin_country': 'US'},
  {'id': 133802,
   'logo_path': None,
   'name': 'C47 Productions',
   'origin_country': ''},
  {'id': 133803,
   'logo_path': None,
   'na

In [28]:
def get_movie_data(movie_id):
    """Returns specific keys of interest by tmbd movie_id"""
    movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US"
    response = requests.get(movie_url)

    keys_of_interest = ['id','imdb_id','title','budget','release_date',
                        'revenue','runtime', 'original_language',
                        'original_title', ('spoken_languages','name'),
                        'vote_average','vote_count', 
                        'genres','poster_path','production_companies']
    data ={}
    for key in keys_of_interest:
        
        if isinstance(key,tuple):
            key=key[0]
            subkey = key[1]
            
            val = response.json()[key]

        else:
            val = response.json()[key]
        data[key] = val
    return data

In [29]:
get_movie_data(test_ids[0])

{'id': 116977,
 'imdb_id': 'tt0249516',
 'title': 'Foodfight!',
 'budget': 65000000,
 'release_date': '2012-06-15',
 'revenue': 73706,
 'runtime': 87,
 'original_language': 'en',
 'original_title': 'Foodfight!',
 'spoken_languages': [{'iso_639_1': 'en', 'name': 'English'}],
 'vote_average': 1.7,
 'vote_count': 79,
 'genres': [{'id': 16, 'name': 'Animation'},
  {'id': 28, 'name': 'Action'},
  {'id': 35, 'name': 'Comedy'},
  {'id': 10751, 'name': 'Family'}],
 'poster_path': '/R1zn75Yz4wpdd85byqhP7mT6No.jpg',
 'production_companies': [{'id': 4174,
   'logo_path': None,
   'name': 'Threshold Entertainment',
   'origin_country': ''},
  {'id': 1632,
   'logo_path': '/cisLn1YAUuptXVBa0xjq7ST9cH0.png',
   'name': 'Lionsgate',
   'origin_country': 'US'},
  {'id': 133802,
   'logo_path': None,
   'name': 'C47 Productions',
   'origin_country': ''},
  {'id': 133803,
   'logo_path': None,
   'name': 'Natural Image',
   'origin_country': ''},
  {'id': 133804,
   'logo_path': None,
   'name': 'Story

## [BOOKMARK] Rated Movies
- **https://www.themoviedb.org/talk/57f7aa18925141763600106e?language=en-US**
- 
https://developers.themoviedb.org/3/movies/get-movie-details

In [30]:
import numpy as np

## Get records for test_ids
import time 
sleep_times = np.linspace(0.5,1.2,5)
sleep_times

array([0.5  , 0.675, 0.85 , 1.025, 1.2  ])

In [31]:
DF['tmdb_id']

0       116977
1       308024
2       308024
3        83770
4        83770
         ...  
2030    547590
2031    476968
2032    491418
2033    493922
2034    532908
Name: tmdb_id, Length: 2035, dtype: int64

In [32]:
from tqdm import trange,tqdm
# tqdm()

In [33]:
indices = range(0,len(dftm['id']))
indices

range(0, 23106)

## Extraction Loop

In [34]:
## Extracting IDs
really_run_it = input('Are you SURE you want to run this? It took 35 mins')

if really_run_it:
    records = []

    for i in trange(len(DF['id'])):
        id_ = DF['id'][i]
        time.sleep(np.random.choice(sleep_times))

        try:
            records.append(get_movie_data(id_))
        except:
            print(f"Error retrieving {id_}")
            
        if i%500==0:
            try:
                results = pd.DataFrame.from_records(records)
                results.to_csv(f"tmdb_extract_{i}_of_{len(DF['id'])}.csv")
            except:
                print(f'ERROR SAVING OUTPUT ON ID#{i}')
    full_results = pd.DataFrame.from_records(records)
    full_results.to_csv('full_extract_tmdb.csv')
                            
                               

Are you SURE you want to run this? It took 35 mins


# BOOKMARK 06/03

In [35]:
full_results = pd.DataFrame.from_records(records)
# full_results.to_csv('example_extract_tmdb.csv')
full_results

NameError: name 'records' is not defined

In [None]:
full_results

In [None]:
def unnest_cell(cell,key):
    output=[]
    for item in cell:
        output.append(item[key])
    return output

In [None]:
results=full_results.copy()
cols_to_explode={'genres':['id','name'],
                'spoken_languages':['name'],
                'production_companies':['id','name','origin_country']}

for col,keys in cols_to_explode.items():
    for key in keys:
        results[col+'_'+key] = results[col].apply(lambda x: unnest_cell(x,key))
    results.drop(col,axis=1,inplace=True)
results

In [None]:
results.to_csv('api_results_060320.csv',index=False)

In [None]:
# i=1260
# results.to_csv(f"tmdb_extract_{i}_of_{len(dftm['id'])}.csv")

In [None]:
# results.to_csv('cleaned_results_example.csv')

In [None]:
results

In [None]:
results.info()

In [None]:
results['production_companies'][0]

# [i/o] BOOKMARK 0603-Ratings

In [15]:
df= pd.read_csv('api_results_060320.csv')

df.head()

Unnamed: 0,id,imdb_id,title,budget,release_date,revenue,runtime,original_language,original_title,vote_average,vote_count,poster_path,genres_id,genres_name,spoken_languages_name,production_companies_id,production_companies_name,production_companies_origin_country
0,116977,tt0249516,Foodfight!,65000000,2012-06-15,73706,87.0,en,Foodfight!,1.7,76,/R1zn75Yz4wpdd85byqhP7mT6No.jpg,"[16, 28, 35, 10751]","['Animation', 'Action', 'Comedy', 'Family']",['English'],"[4174, 1632, 133802, 133803, 133804]","['Threshold Entertainment', 'Lionsgate', 'C47 ...","['', 'US', '', '', '']"
1,308024,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
2,308024,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
3,83770,tt0337692,On the Road,25000000,2012-05-22,8784318,137.0,en,On the Road,5.6,600,/k7LQteD02p3VHixbS6NXHkFdFwT.jpg,"[12, 18]","['Adventure', 'Drama']","['English', 'Français']","[8372, 70, 79077, 614, 83, 10611, 346, 21914, ...","['SPAD Films', 'American Zoetrope', 'Jerry Lei...","['', 'US', '', '', 'FR', 'FR', 'BR', 'AR', 'CA..."
4,83770,tt0337692,On the Road,25000000,2012-05-22,8784318,137.0,en,On the Road,5.6,600,/k7LQteD02p3VHixbS6NXHkFdFwT.jpg,"[12, 18]","['Adventure', 'Drama']","['English', 'Français']","[8372, 70, 79077, 614, 83, 10611, 346, 21914, ...","['SPAD Films', 'American Zoetrope', 'Jerry Lei...","['', 'US', '', '', 'FR', 'FR', 'BR', 'AR', 'CA..."


In [16]:
movie_id = df['id'][0]

def get_release_date_ratings(movie_id,as_json=True):
    request_url = f"https://api.themoviedb.org/3/movie/{movie_id}/release_dates?api_key={api_key}"
    response =requests.get(request_url)
    
    if as_json:
        response = response.json()
    return response

In [17]:
response = get_release_date_ratings(movie_id)
print(response.keys())
response['results']

dict_keys(['id', 'results'])


[{'iso_3166_1': 'US',
  'release_dates': [{'certification': 'PG',
    'iso_639_1': '',
    'note': '',
    'release_date': '2013-05-07T00:00:00.000Z',
    'type': 3}]},
 {'iso_3166_1': 'GB',
  'release_dates': [{'certification': 'PG',
    'iso_639_1': '',
    'note': '',
    'release_date': '2012-06-15T00:00:00.000Z',
    'type': 3}]}]

In [20]:
response['results'][1]['iso_3166_1']

'GB'

In [21]:
pd.DataFrame.from_records(response['results'])

Unnamed: 0,iso_3166_1,release_dates
0,US,"[{'certification': 'PG', 'iso_639_1': '', 'not..."
1,GB,"[{'certification': 'PG', 'iso_639_1': '', 'not..."


In [22]:
pd.DataFrame.from_dict(response['results'])

Unnamed: 0,iso_3166_1,release_dates
0,US,"[{'certification': 'PG', 'iso_639_1': '', 'not..."
1,GB,"[{'certification': 'PG', 'iso_639_1': '', 'not..."


## Using `tmdbsimple` package

In [10]:
# !pip install tmdbsimple
import tmdbsimple as tmdb
tmdb.API_KEY = api_key

ModuleNotFoundError: No module named 'tmdbsimple'

In [40]:
movie = tmdb.Movies(movie_id)
help(movie)
dict(movie)

Help on Movies in module tmdbsimple.movies object:

class Movies(tmdbsimple.base.TMDB)
 |  Movies functionality.
 |  
 |  See: https://developers.themoviedb.org/3/movies
 |  
 |  Method resolution order:
 |      Movies
 |      tmdbsimple.base.TMDB
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, id=0)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  account_states(self, **kwargs)
 |      This method lets users get the status of whether or not the movie has
 |      been rated or added to their favourite or watch lists. A valid session
 |      id is required.
 |      
 |      Args:
 |          session_id: see Authentication.
 |      
 |      Returns:
 |          A dict representation of the JSON returned from the API.
 |  
 |  alternative_titles(self, **kwargs)
 |      Get the alternative titles for a specific movie id.
 |      
 |      Args:
 |          country: (optional) ISO 3166-1 code.
 |          append_to_response: (opti

TypeError: 'Movies' object is not iterable

In [38]:
movie.external_ids()

{'id': 116977,
 'imdb_id': 'tt0249516',
 'facebook_id': None,
 'instagram_id': None,
 'twitter_id': None}

In [26]:
ids = df['id']
ids

0       116977
1       308024
2       308024
3        83770
4        83770
         ...  
2030    547590
2031    476968
2032    491418
2033    493922
2034    532908
Name: id, Length: 2035, dtype: int64

In [27]:
releases = movie.release_dates()

releases['results']

[{'iso_3166_1': 'US',
  'release_dates': [{'certification': 'PG',
    'iso_639_1': '',
    'note': '',
    'release_date': '2013-05-07T00:00:00.000Z',
    'type': 3}]},
 {'iso_3166_1': 'GB',
  'release_dates': [{'certification': 'PG',
    'iso_639_1': '',
    'note': '',
    'release_date': '2012-06-15T00:00:00.000Z',
    'type': 3}]}]

In [41]:
movie_info = {}
movie_info['id'] = releases['id']

results = releases['results']
results

NameError: name 'releases' is not defined

In [42]:
for r in results:
#     display(r)
    if r['iso_3166_1' ] =='US':
        dates = r['release_dates']
        movie_info['certification'] = [dates[i]['certification'] for i in range(len(dates))]
        movie_info['certification_release_date'] = [dates[i]['release_date'] for i in range(len(dates))]

pd.DataFrame(movie_info)

NameError: name 'results' is not defined

In [30]:
def get_certification_by_id(movie_id,as_df=False):
    ## Get movie and release dates
    movie = tmdb.Movies(movie_id)
    releases = movie.release_dates()

    ## Construct output dict
    movie_info = {}
    movie_info['id'] = releases['id']

    ## Loop through results lists
    results = releases['results']
    
    for r in results:

        if r['iso_3166_1' ] =='US':
            dates = r['release_dates']
            
            date_index = range(len(dates))
            movie_info['certification'] = [dates[i]['certification'] for i in date_index]
            movie_info['certification_release_date'] = [dates[i]['release_date'] for i in date_index]
    if as_df:
        return pd.DataFrame(movie_info)
    else:
        return movie_info
    

In [31]:
df= pd.read_csv('api_results_060320.csv')
df

Unnamed: 0,id,imdb_id,title,budget,release_date,revenue,runtime,original_language,original_title,vote_average,vote_count,poster_path,genres_id,genres_name,spoken_languages_name,production_companies_id,production_companies_name,production_companies_origin_country
0,116977,tt0249516,Foodfight!,65000000,2012-06-15,73706,87.0,en,Foodfight!,1.7,76,/R1zn75Yz4wpdd85byqhP7mT6No.jpg,"[16, 28, 35, 10751]","['Animation', 'Action', 'Comedy', 'Family']",['English'],"[4174, 1632, 133802, 133803, 133804]","['Threshold Entertainment', 'Lionsgate', 'C47 ...","['', 'US', '', '', '']"
1,308024,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
2,308024,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
3,83770,tt0337692,On the Road,25000000,2012-05-22,8784318,137.0,en,On the Road,5.6,600,/k7LQteD02p3VHixbS6NXHkFdFwT.jpg,"[12, 18]","['Adventure', 'Drama']","['English', 'Français']","[8372, 70, 79077, 614, 83, 10611, 346, 21914, ...","['SPAD Films', 'American Zoetrope', 'Jerry Lei...","['', 'US', '', '', 'FR', 'FR', 'BR', 'AR', 'CA..."
4,83770,tt0337692,On the Road,25000000,2012-05-22,8784318,137.0,en,On the Road,5.6,600,/k7LQteD02p3VHixbS6NXHkFdFwT.jpg,"[12, 18]","['Adventure', 'Drama']","['English', 'Français']","[8372, 70, 79077, 614, 83, 10611, 346, 21914, ...","['SPAD Films', 'American Zoetrope', 'Jerry Lei...","['', 'US', '', '', 'FR', 'FR', 'BR', 'AR', 'CA..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2030,547590,tt7374952,El Chicano,0,2019-05-03,1370000,148.0,en,El Chicano,7.5,32,/6LSJL2KHUAxSjDTWA5V5Cr7sNNd.jpg,"[18, 28, 80]","['Drama', 'Action', 'Crime']",['English'],[99321],['WarParty Films'],['US']
2031,476968,tt7388562,"Paul, Apostle of Christ",5000000,2018-03-23,22525668,108.0,en,"Paul, Apostle of Christ",6.8,123,/2NUXG94dGMKYgJL1BkJGKynMb3l.jpg,[36],['History'],"['Español', 'English']","[81520, 10156, 105388]","['Outside Da Box', 'Affirm Films', 'ODB Films']","['', 'US', '']"
2032,491418,tt7401588,Instant Family,48000000,2018-11-16,14700000,118.0,en,Instant Family,7.5,1437,/dic3GdmMpxxfkCQfvZnasb5ZkSG.jpg,"[35, 18]","['Comedy', 'Drama']",['English'],"[8537, 4, 119509]","['Closest to the Hole Productions', 'Paramount...","['US', 'US', 'US']"
2033,493922,tt7784604,Hereditary,10000000,2018-06-07,79336821,127.0,en,Hereditary,7.1,3950,/lHV8HHlhwNup2VbpiACtlKzaGIQ.jpg,"[27, 9648, 53]","['Horror', 'Mystery', 'Thriller']",['English'],[24277],['PalmStar Media'],['US']


In [33]:
from tqdm import trange
import time
movie_ratings = []

for i in trange(len(df['id'])):
    movie_id  = df['id'][i]
    try:
        time.sleep(0.1)
        movie_ratings.append(get_certification_by_id(movie_id))
    except:
        print(f"Error when retreiving {movie_id}")


100%|██████████| 2035/2035 [08:43<00:00,  3.89it/s]


In [35]:
ratings_df = pd.DataFrame.from_records(movie_ratings)
ratings_df.head()

Unnamed: 0,id,certification,certification_release_date
0,116977,[PG],[2013-05-07T00:00:00.000Z]
1,308024,"[, R]","[2015-01-23T00:00:00.000Z, 2015-06-19T00:00:00..."
2,308024,"[, R]","[2015-01-23T00:00:00.000Z, 2015-06-19T00:00:00..."
3,83770,[R],[2012-12-21T00:00:00.000Z]
4,83770,[R],[2012-12-21T00:00:00.000Z]


In [50]:
ratings_df

Unnamed: 0,id,certification,certification_release_date
0,116977,[PG],[2013-05-07T00:00:00.000Z]
1,308024,"[, R]","[2015-01-23T00:00:00.000Z, 2015-06-19T00:00:00..."
2,308024,"[, R]","[2015-01-23T00:00:00.000Z, 2015-06-19T00:00:00..."
3,83770,[R],[2012-12-21T00:00:00.000Z]
4,83770,[R],[2012-12-21T00:00:00.000Z]
...,...,...,...
2030,547590,"[, , ]","[2018-09-22T00:00:00.000Z, 2019-05-03T00:00:00..."
2031,476968,"[PG-13, PG-13]","[2018-03-28T00:00:00.000Z, 2018-06-19T00:00:00..."
2032,491418,"[, , PG-13]","[2019-03-05T00:00:00.000Z, 2019-02-19T00:00:00..."
2033,493922,"[, R, R, R]","[2018-09-04T00:00:00.000Z, 2018-01-21T00:00:00..."


In [67]:
def clean_certification(row):
    if isinstance(row,list):
        out = ','.join(set([str(x) for x in row if len(x)>0]))
    else:
        return row
    return out

In [68]:
clean_certification(ratings_df['certification'][1])

'R'

In [70]:
ratings_df['rating'] = ratings_df['certification'].apply(clean_certification)

In [82]:
import numpy as np
dates = ratings_df['certification_release_date'].apply(lambda x:\
                                               x[0] if isinstance(x,list)\
                                               else np.nan)
ratings_df['release_date_cert'] = pd.to_datetime(dates)
ratings_df = ratings_df.drop(['certification','certification_release_date'],axis=1)
ratings_df

Unnamed: 0,id,rating,release_date_cert
0,116977,PG,2013-05-07 00:00:00+00:00
1,308024,R,2015-01-23 00:00:00+00:00
2,308024,R,2015-01-23 00:00:00+00:00
3,83770,R,2012-12-21 00:00:00+00:00
4,83770,R,2012-12-21 00:00:00+00:00
...,...,...,...
2030,547590,,2018-09-22 00:00:00+00:00
2031,476968,PG-13,2018-03-28 00:00:00+00:00
2032,491418,PG-13,2019-03-05 00:00:00+00:00
2033,493922,R,2018-09-04 00:00:00+00:00


In [83]:
# ratings_df.to_csv('api_ratings_060320.csv')

In [88]:
df_merged = pd.merge(ratings_df,df,on='id',how="right")
df_merged

Unnamed: 0,id,rating,release_date_cert,imdb_id,title,budget,release_date,revenue,runtime,original_language,original_title,vote_average,vote_count,poster_path,genres_id,genres_name,spoken_languages_name,production_companies_id,production_companies_name,production_companies_origin_country
0,116977,PG,2013-05-07 00:00:00+00:00,tt0249516,Foodfight!,65000000,2012-06-15,73706,87.0,en,Foodfight!,1.7,76,/R1zn75Yz4wpdd85byqhP7mT6No.jpg,"[16, 28, 35, 10751]","['Animation', 'Action', 'Comedy', 'Family']",['English'],"[4174, 1632, 133802, 133803, 133804]","['Threshold Entertainment', 'Lionsgate', 'C47 ...","['', 'US', '', '', '']"
1,308024,R,2015-01-23 00:00:00+00:00,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
2,308024,R,2015-01-23 00:00:00+00:00,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
3,308024,R,2015-01-23 00:00:00+00:00,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
4,308024,R,2015-01-23 00:00:00+00:00,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3362,547590,,2018-09-22 00:00:00+00:00,tt7374952,El Chicano,0,2019-05-03,1370000,148.0,en,El Chicano,7.5,32,/6LSJL2KHUAxSjDTWA5V5Cr7sNNd.jpg,"[18, 28, 80]","['Drama', 'Action', 'Crime']",['English'],[99321],['WarParty Films'],['US']
3363,476968,PG-13,2018-03-28 00:00:00+00:00,tt7388562,"Paul, Apostle of Christ",5000000,2018-03-23,22525668,108.0,en,"Paul, Apostle of Christ",6.8,123,/2NUXG94dGMKYgJL1BkJGKynMb3l.jpg,[36],['History'],"['Español', 'English']","[81520, 10156, 105388]","['Outside Da Box', 'Affirm Films', 'ODB Films']","['', 'US', '']"
3364,491418,PG-13,2019-03-05 00:00:00+00:00,tt7401588,Instant Family,48000000,2018-11-16,14700000,118.0,en,Instant Family,7.5,1437,/dic3GdmMpxxfkCQfvZnasb5ZkSG.jpg,"[35, 18]","['Comedy', 'Drama']",['English'],"[8537, 4, 119509]","['Closest to the Hole Productions', 'Paramount...","['US', 'US', 'US']"
3365,493922,R,2018-09-04 00:00:00+00:00,tt7784604,Hereditary,10000000,2018-06-07,79336821,127.0,en,Hereditary,7.1,3950,/lHV8HHlhwNup2VbpiACtlKzaGIQ.jpg,"[27, 9648, 53]","['Horror', 'Mystery', 'Thriller']",['English'],[24277],['PalmStar Media'],['US']


In [90]:
df_merged =df_merged.drop_duplicates()
df_merged

Unnamed: 0,id,rating,release_date_cert,imdb_id,title,budget,release_date,revenue,runtime,original_language,original_title,vote_average,vote_count,poster_path,genres_id,genres_name,spoken_languages_name,production_companies_id,production_companies_name,production_companies_origin_country
0,116977,PG,2013-05-07 00:00:00+00:00,tt0249516,Foodfight!,65000000,2012-06-15,73706,87.0,en,Foodfight!,1.7,76,/R1zn75Yz4wpdd85byqhP7mT6No.jpg,"[16, 28, 35, 10751]","['Animation', 'Action', 'Comedy', 'Family']",['English'],"[4174, 1632, 133802, 133803, 133804]","['Threshold Entertainment', 'Lionsgate', 'C47 ...","['', 'US', '', '', '']"
1,308024,R,2015-01-23 00:00:00+00:00,tt3844362,The Overnight,0,2015-06-19,1100000,80.0,en,The Overnight,6.0,222,/65L466UR3h1jw8mTJhGmMZlj0eT.jpg,"[9648, 35]","['Mystery', 'Comedy']","['English', 'Français']","[1974, 63631]","['Duplass Brothers Productions', ""Gettin' Rad ...","['US', '']"
5,83770,R,2012-12-21 00:00:00+00:00,tt0337692,On the Road,25000000,2012-05-22,8784318,137.0,en,On the Road,5.6,600,/k7LQteD02p3VHixbS6NXHkFdFwT.jpg,"[12, 18]","['Adventure', 'Drama']","['English', 'Français']","[8372, 70, 79077, 614, 83, 10611, 346, 21914, ...","['SPAD Films', 'American Zoetrope', 'Jerry Lei...","['', 'US', '', '', 'FR', 'FR', 'BR', 'AR', 'CA..."
14,116745,PG,2013-12-25 00:00:00+00:00,tt0359950,The Secret Life of Walter Mitty,90000000,2013-12-18,188133322,114.0,en,The Secret Life of Walter Mitty,7.1,5561,/tY6ypjKOOtujhxiSwTmvA4OZ5IE.jpg,"[12, 35, 18, 14]","['Adventure', 'Comedy', 'Drama', 'Fantasy']",['English'],"[290, 22213, 9118, 12, 10893, 37336, 25, 2932]","['Ingenious Media', 'TSG Entertainment', 'Samu...","['GB', 'US', 'US', 'US', '', '', 'US', 'US']"
15,169917,R,2014-09-19 00:00:00+00:00,tt0365907,A Walk Among the Tombstones,28000000,2014-09-18,53181600,113.0,en,A Walk Among the Tombstones,6.3,1926,/bQTHTZezSudf27mMQtedHf1XpgO.jpg,"[80, 18, 9648, 53]","['Crime', 'Drama', 'Mystery', 'Thriller']",['English'],"[39043, 216, 40106, 11448, 10246, 8532, 40107,...","['Traveling Picture Show Company (TPSC)', 'Jer...","['', '', '', 'US', 'US', '', '', '']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3362,547590,,2018-09-22 00:00:00+00:00,tt7374952,El Chicano,0,2019-05-03,1370000,148.0,en,El Chicano,7.5,32,/6LSJL2KHUAxSjDTWA5V5Cr7sNNd.jpg,"[18, 28, 80]","['Drama', 'Action', 'Crime']",['English'],[99321],['WarParty Films'],['US']
3363,476968,PG-13,2018-03-28 00:00:00+00:00,tt7388562,"Paul, Apostle of Christ",5000000,2018-03-23,22525668,108.0,en,"Paul, Apostle of Christ",6.8,123,/2NUXG94dGMKYgJL1BkJGKynMb3l.jpg,[36],['History'],"['Español', 'English']","[81520, 10156, 105388]","['Outside Da Box', 'Affirm Films', 'ODB Films']","['', 'US', '']"
3364,491418,PG-13,2019-03-05 00:00:00+00:00,tt7401588,Instant Family,48000000,2018-11-16,14700000,118.0,en,Instant Family,7.5,1437,/dic3GdmMpxxfkCQfvZnasb5ZkSG.jpg,"[35, 18]","['Comedy', 'Drama']",['English'],"[8537, 4, 119509]","['Closest to the Hole Productions', 'Paramount...","['US', 'US', 'US']"
3365,493922,R,2018-09-04 00:00:00+00:00,tt7784604,Hereditary,10000000,2018-06-07,79336821,127.0,en,Hereditary,7.1,3950,/lHV8HHlhwNup2VbpiACtlKzaGIQ.jpg,"[27, 9648, 53]","['Horror', 'Mystery', 'Thriller']",['English'],[24277],['PalmStar Media'],['US']


In [91]:
df_merged.to_csv('__api_results_combined.csv')

In [44]:
ratings_df['certification'].map(len)

TypeError: object of type 'float' has no len()

# Loop to get all IDs' release dates and certifications

In [128]:
pd.DataFrame(movie.release_dates())

Unnamed: 0,id,results
0,116977,"{'iso_3166_1': 'US', 'release_dates': [{'certi..."
1,116977,"{'iso_3166_1': 'GB', 'release_dates': [{'certi..."


In [121]:
# list(filter(lambda k: 'US' in k , movie.release_dates()['results'])#[0]['release_dates']

In [None]:

for movie_id in ids:
    movie = tmdb.Movies(movie_id)
    release_dates = movie.release_dates()['results']
    

In [None]:

search = tmdb.Search()
search.movie(query='Avatar')

# OLD

## Find by id
https://developers.themoviedb.org/3/find/find-by-id

- Need: 
    - external_df
    
    - api_key
    - language
    - external_source  (imdb_id, freebase_mid, freebase_id, tvdb_id, tvrage_id, facebook_id, twitter_id, instagram_id)
    
```python
url = f"https://api.themoviedb.org/3/find/tt0111414?api_key={api_key}&language=en-US&external_source={imdb_id}"
```

In [None]:
id_ = None
source = "imdb_id"

base_find_url = "https://api.themoviedb.org/3/find/"
find_query = f"{base_find_url}{id_}?api_key={api_key}&language=en-US&external_source={source}"
find_query

https://api.themoviedb.org/3/movie/550?api_key=e9a24ea6085f06fdd272dab49ad70c2e