In [None]:
# import all the packages needed
import urllib.request
import urllib.error
import json
import datetime
import time
import sys, os
import logging
import glob
from urllib.error import HTTPError
import configparser
from configparser import SafeConfigParser

In [None]:
# generating the date range
def daterange( start_date, end_date ):
    if start_date <= end_date:
        for n in range( ( end_date - start_date ).days + 1 ):
            yield start_date + datetime.timedelta( n )
            
    else:
        for n in range( ( start_date - end_date ).days + 1 ):
            yield start_date - datetime.timedelta( n )
            

In [None]:
# converting to a json format data
def convert(input):
    if isinstance(input, dict):
        return {convert(key): convert(value) for key, value in input.items()}
    elif isinstance(input, list):
        return [convert(element) for element in input]
    else:
        return input

In [None]:
# getting the json file name
def getJsonFileName(date, now, page, json_file_path):
    json_file_name = (str(date)+'_' + str(now) + '_' + str(page) + '.json')
    json_file_name = "".join([json_file_path,'\\', json_file_name])
    return json_file_name

In [None]:
# get the movide id from the tmdb API    
def getMovieID(start,end, api_key, json_file_path):
    # LOOP THROUGH THE 150 PAGES FOR THAT DATE
    for page in range(150): # 5 tries
        try:
            page = page+1
            request_string = "https://api.themoviedb.org/3/movie/changes?start_date="+ str(start) +"&end_date=" + str(end) + "&page="+ str(page) +"&api_key=" + str(api_key) +""
            response = urllib.request.urlopen(request_string)
            content = response.read().decode('utf-8')
            if content:
                movies = convert(json.loads(content))
                # if there are movies here
                if len(movies["results"]) >= 1:
                    now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
                    json_file_name = getJsonFileName(end, now, page, json_file_path)
                    json_file = open(json_file_name, 'w')
                    json_file.write(content)
                    json_file.close()
                # if no more movies, go to next date
                else:
                    return
            time.sleep(3)
        except HTTPError as e:
                logging.error("HTTPError on page %s on %s (err no. %s: %s) Here's the URL of the call: %s", page,  e.code, e.reason, request_string)
                if e.code == 403:
                    print("Script hit a snag and got an HTTPError 403. Check your log file for more info.")
                    return
                if e.code == 429:
                    print ("Waiting. You've probably reached an API limit.")
                    time.sleep(30) # wait 30 seconds and try again
        except: 
                logging.error("Error on %s page %s: %s",  sys.exc_info()[0])
                continue


In [None]:
# main() which genearates the tmdb movie ID's
def main():
    config = SafeConfigParser() # reading the config file
    script_dir = os.getcwd() 
    config_file = os.path.join(script_dir, 'config\config.cfg')
    config.read(config_file)
    json_file_path = config.get('files','json_folder') # getting the values
    log_file = config.get('files','logfile')
    api_key = config.get('tmdb','api_key')    
    start = datetime.date( year = int(config.get('tmdb','start_year')), month = int(config.get('tmdb','start_month')), day = int(config.get('tmdb','start_day')) )
    end = datetime.date( year = int(config.get('tmdb','end_year')), month = int(config.get('tmdb','end_month')), day = int(config.get('tmdb','end_day')) )
    logging.basicConfig(filename=log_file, level=logging.INFO)
    logging.info("Getting started.") 
    # calling the function
    getMovieID(start,end, api_key, json_file_path)
       
if __name__ == '__main__' :
    main()

In [None]:
# calling the movie ID's function get a list of ID's
def getMovieIDs(script_dir):
    configFiles = glob.glob(script_dir + '\Data\Movie ID\*.json')
    ids =[]
    for file in configFiles:
        with open (file) as json_file:
            json_data = json.load(json_file)
            results = json_data['results']
            [ids.append(i['id']) for i in results]
    return ids

In [None]:
# generated to get a the details for each TMDB movie ID
def getMovieDetails():
    config = SafeConfigParser()
    script_dir = os.getcwd() 
    config_file = os.path.join(script_dir, 'config\config.cfg')
    config.read(config_file)
    json_file_path = config.get('files','json_folder_details')
    api_key = config.get('tmdb','api_key') 
    ids = getMovieIDs(script_dir)
    # Loop through the list of movie id's
    for ID in ids:
        try:
            request_string = "https://api.themoviedb.org/3/movie/"+str(ID)+"?api_key="+api_key+"&language=en-US&append_to_response=credits,release_dates,alternative_titles,translations,keywords"
            response = urllib.request.urlopen(request_string)
            content = response.read().decode('utf-8')
            if content:
                # checking whether content is present after hitting url
                movies = convert(json.loads(content))
                if len(movies) >= 1:
                    json_file_name = (str(ID)+'.json')
                    json_file_name = "".join([json_file_path,'\\', json_file_name])
                    json_file = open(json_file_name, 'w', encoding='utf-8')
                    json_file.write(content)
                    json_file.close()
            else:
                return
            time.sleep(3)
        except HTTPError as e:
            if e.code == 404:
                print("Page is not found")
                continue

In [None]:
# calling the function to download data
getMovieDetails()

In [None]:
# finding the imdb movie ID's
def getMovieIDs(script_dir):
    configFiles = glob.glob(script_dir + '\Final Project Data\Movie Details\*.json')
    ids =[]
    for file in configFiles:
        with open (file) as json_file:
            json_data = json.load(json_file)
            results = json_data['results']
            [ids.append(i['id']) for i in results]
    return ids