In [1]:
import pandas as pd
import os
import json
import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook

In [2]:
def write_json(new_data, file_name):
    """Adapted from: https://www.geeksforgeeks.org/append/-to-json-file-using-python/"""
    with open(file_name, "r+") as file:
        file_data = json.load(file)
        
        if (type(new_data) == list) & (type(file_data) == list):
            file_data.extend(new_data)
        else:
            file_data.append(new_data)
        file.seek(0)
        
        json.dump(file_data, file)

In [6]:
def get_movie_with_rating(movie_id):
    movie = tmdb.Movies(movie_id)
    movie_info = movie.info()
    releases = movie.releases()

    for c in releases["countries"]:
        if c["iso_3166_1"] == "US":
            movie_info["certification"] = c["certification"]
    return movie_info

In [7]:
folder = "Data/"
os.makedirs(folder, exist_ok=True)
os.listdir(folder)

['.ipynb_checkpoints',
 'title_akas.csv.gz',
 'title_basics.csv.gz',
 'title_ratings.csv.gz']

In [25]:
YEARS_TO_GET = list(range(2000, 2023))

In [26]:
YEARS_TO_GET

[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020,
 2021,
 2022]

In [27]:
with open("c:/Users/oscar/.secret/tmdb_api.json") as f:
    login = json.load(f)
login.keys()

dict_keys(['api_key'])

In [28]:
tmdb.API_KEY = login["api_key"]

In [29]:
for YEAR in tqdm_notebook(YEARS_TO_GET, desc="YEARS", position=0):
    JSON_FILE = f"{folder}tmdb_api_results{YEAR}.json"
    file_exist = os.path.isfile(JSON_FILE)
    
    if file_exist == False:
        with open(JSON_FILE, "w") as f:
            json.dump([{"imdb_id":0}], f)
            
        print(f"Creating {JSON_FILE} with imdb_id:0")
        
    else:
        print(f"{JSON_FILE} already exist!")
    
    basics = pd.read_csv("Data/title_basics.csv.gz")
    df = basics.loc[ basics["startYear"] == YEAR].copy()
    movie_ids = df["tconst"].copy()
    previous_df = pd.read_json(JSON_FILE)
    movie_ids_to_get = movie_ids[~movie_ids.isin(previous_df["imdb_id"])]
    
    for movie_id in tqdm_notebook(movie_ids_to_get,
                                 desc=f"Movies from {YEAR}",
                                 position=1,
                                 leave=True):
        try:
            temp = get_movie_with_rating(movie_id)
            write_json(temp, JSON_FILE)
            
            time.sleep(0.02)
        except Exception as e:
                continue

YEARS:   0%|          | 0/23 [00:00<?, ?it/s]

Data/tmdb_api_results2000.json already exist!


Movies from 2000:   0%|          | 0/207 [00:00<?, ?it/s]

Data/tmdb_api_results2001.json already exist!


Movies from 2001:   0%|          | 0/241 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2002.json with imdb_id:0


Movies from 2002:   0%|          | 0/1504 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2003.json with imdb_id:0


Movies from 2003:   0%|          | 0/1631 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2004.json with imdb_id:0


Movies from 2004:   0%|          | 0/1833 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2005.json with imdb_id:0


Movies from 2005:   0%|          | 0/2124 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2006.json with imdb_id:0


Movies from 2006:   0%|          | 0/2347 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2007.json with imdb_id:0


Movies from 2007:   0%|          | 0/2483 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2008.json with imdb_id:0


Movies from 2008:   0%|          | 0/2824 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2009.json with imdb_id:0


Movies from 2009:   0%|          | 0/3451 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2010.json with imdb_id:0


Movies from 2010:   0%|          | 0/3749 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2011.json with imdb_id:0


Movies from 2011:   0%|          | 0/4143 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2012.json with imdb_id:0


Movies from 2012:   0%|          | 0/4425 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2013.json with imdb_id:0


Movies from 2013:   0%|          | 0/4621 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2014.json with imdb_id:0


Movies from 2014:   0%|          | 0/4771 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2015.json with imdb_id:0


Movies from 2015:   0%|          | 0/4934 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2016.json with imdb_id:0


Movies from 2016:   0%|          | 0/5146 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2017.json with imdb_id:0


Movies from 2017:   0%|          | 0/5509 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2018.json with imdb_id:0


Movies from 2018:   0%|          | 0/5630 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2019.json with imdb_id:0


Movies from 2019:   0%|          | 0/5676 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2020.json with imdb_id:0


Movies from 2020:   0%|          | 0/4768 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2021.json with imdb_id:0


Movies from 2021:   0%|          | 0/4719 [00:00<?, ?it/s]

Creating Data/tmdb_api_results2022.json with imdb_id:0


Movies from 2022:   0%|          | 0/1601 [00:00<?, ?it/s]