In [1]:
import requests
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict

In [10]:
API_KEY = "4f7e050b4d9ee8f88f4b60cc46ec39b3"
HEB = 'he-IL'
ENG = 'en-US'
EARLIEST_YEAR = 1984
MAX_MOVIES = 2974
MOVIES_PER_PAGE = 20

## Download top voted TV Series

In [12]:
top_rated_request = 'https://api.themoviedb.org/3/tv/top_rated?api_key={api_key}&language={lang}&page={page}'

In [29]:

top_rated_tvs = []

for page in tqdm(range(1, MAX_MOVIES // 20 +1), "Downloading TV Series"):
    request_url = top_rated_request.format(api_key=API_KEY, lang=HEB, page=page)
    response = requests.get(request_url)
    if response.status_code == 200:
        json_data = response.json()
        top_rated_tvs += json_data['results']


Downloading TV Series:   0%|          | 0/148 [00:00<?, ?it/s]

## Downloading metadata

In [14]:

def download_tv_data(movie_id):
    details_url = f'https://api.themoviedb.org/3/tv/{movie_id}'
    credits_url = f'https://api.themoviedb.org/3/tv/{movie_id}/credits'
    keywords_url = f'https://api.themoviedb.org/3/tv/{movie_id}/keywords'
    params = {'api_key': API_KEY}

    movie_data = {}

    # Downloading movie details
    response = requests.get(details_url, params=params)
    if response.status_code == 200:
        movie_data['details'] = response.json()

    # Downloading cast data
    response = requests.get(credits_url, params=params)
    if response.status_code == 200:
        movie_data['credits'] = response.json()

    # Downloading cast data
    response = requests.get(keywords_url, params=params)
    if response.status_code == 200:
        movie_data['keywords'] = response.json()

    return movie_id, movie_data

movie_ids = [movie['id'] for movie in top_rated_tvs]
movie_details_map = defaultdict(dict)

with ThreadPoolExecutor() as executor:
    for movie_id, movie_data in tqdm(executor.map(download_tv_data, movie_ids), total=len(movie_ids), desc="Downloading details and cast"):
        movie_details_map[movie_id] = movie_data


Downloading details and cast:   0%|          | 0/2960 [00:00<?, ?it/s]

In [28]:
movie_details_map[95515]

{'details': {'adult': False,
  'backdrop_path': '/ylOY2q6pW5OV5kTyoxcuuFY5Xy4.jpg',
  'created_by': [],
  'episode_run_time': [43],
  'first_air_date': '2020-01-07',
  'genres': [{'id': 18, 'name': 'Drama'}],
  'homepage': 'https://www.qub.ca/tvaplus/tva/epidemie',
  'id': 95515,
  'in_production': False,
  'languages': ['fr'],
  'last_air_date': '2020-03-10',
  'last_episode_to_air': {'id': 2163499,
   'name': 'Scars',
   'overview': 'Two days before the official announcement that the epidemic is over, the infection of two little girls brings Anne-Marie back to square one. The epidemiological investigation resumes, but the girls are uncooperative. Will Nelli get them to talk and prevent COVA from claiming more victims?',
   'vote_average': 0.0,
   'vote_count': 0,
   'air_date': '2020-03-10',
   'episode_number': 10,
   'episode_type': 'finale',
   'production_code': '',
   'runtime': 43,
   'season_number': 1,
   'show_id': 95515,
   'still_path': '/7xOizJ0zxoJvSHHQHj2e2NaPiqT.jpg'},

## Compose union json

In [30]:
def extract_keys(entry, keys):
    return {key: entry[key] for key in keys if key in entry}

def extract_list_be_key(entries, key):
    return [entry[key] for entry in entries]


def populate_movie(movie, details, credits, keywords):
    movie['genres'] = extract_list_be_key(details['genres'], 'name')
    movie['name_he'] = movie['name']
    movie['name'] = details['name']
    movie['year'] = details['first_air_date'].split('-')[0]
    
    movie['overview_he'] = movie['overview']
    movie['overview'] = details['overview']
  
    movie['production_companies'] = extract_list_be_key(details['production_companies'], 'name')
    movie['networks'] = extract_list_be_key(details['production_companies'], 'name')

    movie['cast'] = [extract_keys(p, ['name', 'character', 'profile_path','order']) for p in credits['cast'][:8]]
    movie['creators'] =  extract_list_be_key(details['created_by'], 'name')

    movie['keywords'] = extract_list_be_key(keywords, 'name')

    if 'genre_ids' in movie:
        del movie['genre_ids']


In [31]:
for movie in top_rated_tvs:
    id = movie['id']
    populate_movie(movie, movie_details_map[id]['details'], movie_details_map[id]['credits'], movie_details_map[id]['keywords']['results'])

In [32]:
top_rated_tvs[2224]

{'backdrop_path': '/1QTP2ZIiT6oIrlZvZcCILA86Op.jpg',
 'first_air_date': '2020-01-07',
 'id': 95515,
 'name': 'Outbreak',
 'origin_country': ['CA'],
 'original_language': 'fr',
 'original_name': 'Épidémie',
 'overview': 'A dangerous virus appears in a group of homeless people, causing a risk of outbreak. How long will it take Anne-Marie Leclerc, director of the Emergency Public Health Laboratory, to realize that an actual epidemic is starting to take shape?',
 'popularity': 35.767,
 'poster_path': '/aYHqgip3sct7nHw5AdLV4PYJzG6.jpg',
 'vote_average': 7.3,
 'vote_count': 126,
 'genres': ['Drama'],
 'name_he': 'Épidémie',
 'year': '2020',
 'overview_he': '',
 'production_companies': ['Sphère Média'],
 'networks': ['Sphère Média'],
 'cast': [{'name': 'Julie Le Breton',
   'character': 'Anne-Marie Lerclerc',
   'profile_path': '/pMSYAWIYfdEWlLU8dHejp93mRIy.jpg',
   'order': 3}],
 'creators': [],
 'keywords': ['montreal, canada', 'epidemic', 'virus']}

## Export json and CSV

In [33]:
import json
with open('top-voted-tvs.json', 'w', encoding='utf-8') as f1:
    json.dump(top_rated_tvs, f1, ensure_ascii=False)

with open('tvs-map.json', 'w', encoding='utf-8') as f2:
    tvs_map = {m['id']: m for m in top_rated_tvs}
    json.dump(tvs_map, f2, ensure_ascii=False)

In [80]:
import pandas as pd
df = pd.DataFrame.from_dict(top_rated_tvs)
df.drop(columns=['name_he', 'overview_he', 'original_name'], inplace=True)
df.to_csv('top_voted_tvs.csv', encoding='utf-8', index=False)