**Setup**

In [None]:
import tmdbsimple as tmdb

In [36]:
import os
from dotenv import load_dotenv, find_dotenv

# Find .env automagically by walking up directories until it's found
# Load up the entries as environment variables
load_dotenv(find_dotenv())
TMDB_API_KEY = os.environ.get("TMDB_API_KEY")

In [None]:
# Set configurations here
tmdb.API_KEY = TMDB_API_KEY

**Github Example**

In [37]:
movie = tmdb.Movies(603)
r = movie.info()
movie.title

'The Matrix'

**Test API for Required Data**

In [None]:
# 0.Configuration (base url and image size)
# 1.Genres
# 2.Discover
# 3.Image

In [1]:
import tmdbsimple as tmdb
import os
from dotenv import load_dotenv, find_dotenv
from urllib.error import HTTPError
from urllib.request import urlretrieve

In [38]:
# Find .env automagically by walking up directories until it's found
# Load up the entries as environment variables
load_dotenv(find_dotenv())
TMDB_API_KEY = os.environ.get("TMDB_API_KEY")
TMDB_RAW_DATA_PATH = os.environ.get("TMDB_RAW_DATA_PATH")

# Set configurations here
tmdb.API_KEY = TMDB_API_KEY

In [3]:
# Get base URL and image size
config = tmdb.Configuration()
r = config.info()

In [None]:
print(config.images)

In [4]:
base_url = config.images['secure_base_url']
poster_size = config.images['poster_sizes'][4] # get 5th largest size
base_image_url = base_url+poster_size

In [None]:
base_image_url

In [None]:
# Get genres
genre = tmdb.Genres()
genre.URLS['list'] = '/movie/list'
r = genre.list()

In [None]:
genre.genres

In [6]:
def sci_fi_genre_ids(genres):
    '''
    Args: A list of dictionaries where key[0] is id, key[1] is name of genre
    Returns: List of id(s) for science fiction and/or sci if & fantasty genres
    '''
    return [item['id'] for item in genres if 'sci' in item['name'].lower()]

In [7]:
# Get genre ids for 'science fiction' and 'sci-fi & fantasy'
genre = tmdb.Genres()

# Need to select tv/movie list, default is movie
type_of_shows = ['tv', 'movie']
genre_ids_of_sci_fi = {}
for t in range(len(type_of_shows)):
    genre.URLS['list'] = '/'+type_of_shows[t]+'/list'
    r = genre.list() # remove this, look for 'status_code' attribute instead
    genre_ids_of_sci_fi[type_of_shows[t]] = sci_fi_genre_ids(genre.genres)
    
print(genre_ids_of_sci_fi)

{'tv': [10765], 'movie': [878]}


In [8]:
# Discover list of movies/tv shows based on genre ids
with_genres = ",".join(str(s) for s in genre_ids_of_sci_fi['tv']) # with_genres only accepts strings (sep = ',' , '|')
print(with_genres)

10765


In [9]:
discover = tmdb.Discover()
r = discover.tv(page=1, with_genres=with_genres)

In [None]:
print(discover.total_pages, discover.total_results)

In [None]:
base_image_url+discover.results[0]['poster_path']

In [None]:
discover.results[0].keys()

In [10]:
# do everything in a chunk and then refactor
discover = tmdb.Discover()
with_genres = ",".join(str(s) for s in genre_ids_of_sci_fi['tv']) # with_genres only accepts strings (sep = ',' , '|')
r = discover.tv(page=1, with_genres=with_genres)
total_pages = discover.total_pages
total_results = discover.total_results

In [11]:
print(total_pages)

98


In [67]:
def build_poster_url_and_filename(poster_path, title, date, tmdb_id):
    '''
    Build the full image url and new filename for poster
    Args: 
        poster_path: '/hwih4=283d.jpg' path of poster
        title: name of tv show or movie
        date: yyyy-mm-dd of first air date (tv) or release date (movie)
        tmdb_id: id associated to tv show or movie
    Returns: strings poster_url and poster_filename
    '''
    # Replace spaces in title with '-', convert the rest to strings
    fname = [title, date, tmdb_id]
    poster_filename = ('_').join([str(w).replace(' ','-') for w in fname])+'.jpg'
    poster_url = base_image_url+poster_path
    
    return poster_url, poster_filename

In [63]:
def download_image(image_url, local_path):  
    # https://stackoverflow.com/a/39594029
    try:
        urlretrieve(image_url, local_path)
    except FileNotFoundError as err:
        print(err, image_url, local_path)
    except HTTPError as err:
        print(err, image_url)

In [61]:
def get_tv_movie_results(medium, with_genres, discvr=tmdb.Discover()):
    if medium == 'tv':
        discvr.tv(page=page, with_genres=with_genres)
    else:
        discvr.movie(page=page, with_genres=with_genres)
        
    return discvr

In [None]:
# Download image and replace file name, [name]_[first_air_date]_[tmdb_id].jpg

# https://stackoverflow.com/a/39594029
try:
    urlretrieve(poster_url, poster_filename)
except FileNotFoundError as err:
    print(err)   # something wrong with local path
except HTTPError as err:
    print(err)  # something wrong with url

In [65]:
tv_movie_attr = {'tv':['poster_path', 'name', 'first_air_date', 'id'], 
                 'movie':['poster_path','title', 'release_date', 'id']}

In [68]:
# from urllib.error import HTTPError
# from urllib.request import urlretrieve

# Loop over each page (get page one again) and then all results on each page
total_pages = 1
for page in range(1, total_pages+1):
    discover = get_tv_movie_results('tv', with_genres, discover)
    for item in range(len(discover.results)):
        poster_url, poster_filename = build_poster_url_and_filename(
            discover.results[item][tv_movie_attr['tv'][0]], # poster_path
            discover.results[item][tv_movie_attr['tv'][1]], # name / title
            discover.results[item][tv_movie_attr['tv'][2]], # first_air_date / release_date
            discover.results[item][tv_movie_attr['tv'][3]]  # id
        )
        
        local_path = TMDB_RAW_DATA_PATH[1:-1]+poster_filename # env var always seems starts+ends with ' char
        download_image(poster_url, local_path) # could return t/f and sum them to assert len(discover.results) == sum
    
    print('**********'+' Completed Page '+str(page)+'**********')

********** Completed Page 1**********


In [None]:
# put everything in 'main' and pass args to file to run tv or movie
# something like filename.py -m tv