In [1]:
import time
import logging
import os
import json
import requests
import pandas as pd
from s3fs.core import S3FileSystem 

In [2]:
os.environ['AWS_CONFIG_FILE'] = 'aws_config.ini'

s3 = S3FileSystem(anon=False)
key = 'TheNumbers_budgets.csv'
bucket = 'movie-torrents'

df = pd.read_csv(s3.open('{}/{}'.format(bucket, key), mode='rb'), index_col=0)
df.head()

Unnamed: 0,title,release_date,production_budget,domestic_gross,world_gross
1,Avatar,2009-12-18,425000000,760507625,2783918982
2,Star Wars Ep. VII: The Force Awakens,2015-12-18,306000000,936662225,2058662225
3,Pirates of the Caribbean: At Worlds End,2007-05-24,300000000,309420425,963420425
4,Spectre,2015-11-06,300000000,200074175,879620923
5,The Dark Knight Rises,2012-07-20,275000000,448139099,1084439099


In [3]:
logger = logging.getLogger('OMDB_API')
logger.setLevel(logging.INFO)

# create a file handler
handler = logging.FileHandler('omdb_api.log')
handler.setLevel(logging.INFO)

# create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

In [4]:
# tupple of movie, title
title = df['title']
year = [year[:4] for year in df['release_date']]
movie_tup = [(title, year) for title, year in zip(title, year)]

In [5]:
with open('./scripts/omdb_api.key', 'r') as read_file:
    omdb_api_key = read_file.read().strip()

lst = []

for title, year in movie_tup:
    # meter number of requests to omdb api
    time.sleep(0.5)

    # omdb api address
    payload = {'t': title, 'y': year, 'apikey': omdb_api_key}
    html = requests.get('http://www.omdbapi.com', params=payload)

    # check for 200 code (good)
    resp = json.loads(html.text)
    if html.status_code != 200 or 'Error' in resp.keys():
        logger.info('Year:{0} - Title:{1}'.format(year, title))
        continue

    html_text = html.text
    html_json = json.loads(html_text)
    
    lst.append(html_json)
    
    if len(lst) > 20:
        break

In [6]:
df = pd.DataFrame.from_dict(lst, orient='columns')
df = df[['Actors', 'Awards', 'BoxOffice', 'Country', 'DVD', 'Director', 'Genre',
       'Language', 'Metascore', 'Production', 'Rated', 'Released', 'Runtime', 'Title', 
       'Type', 'Writer', 'imdbID', 'imdbRating', 'imdbVotes']]

In [7]:
for col in ['BoxOffice', 'imdbVotes']:
    df[col].replace(to_replace='N/A', value='0', inplace=True)
    df[col] = df[col].replace(r'[\$,]', '', regex=True).astype(int)

df['Runtime'] = df['Runtime'].replace(r'[ min]', '', regex=True).astype(int)

for col in ['DVD', 'Released']:
    df[col] = pd.to_datetime(df[col], errors='coerce', format='%d %b %Y')

for col in ['Metascore', 'imdbRating']:
    df[col].replace(to_replace='N/A', value='0', inplace=True)
    df[col] = df[col].astype(float)

In [8]:
df.head()

Unnamed: 0,Actors,Awards,BoxOffice,Country,DVD,Director,Genre,Language,Metascore,Production,Rated,Released,Runtime,Title,Type,Writer,imdbID,imdbRating,imdbVotes
0,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",Won 3 Oscars. Another 85 wins & 128 nominations.,749700000,"UK, USA",2010-04-22,James Cameron,"Action, Adventure, Fantasy","English, Spanish",83.0,20th Century Fox,PG-13,2009-12-18,162,Avatar,movie,James Cameron,tt0499549,7.8,944150
1,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Won 1 Oscar. Another 7 wins & 31 nominations.,208777731,"UK, USA",2016-02-09,Sam Mendes,"Action, Adventure, Thriller","English, Spanish, Italian, German, French",60.0,Sony Pictures,PG-13,2015-11-06,148,Spectre,movie,"John Logan (screenplay), Neal Purvis (screenpl...",tt2379713,6.8,313528
2,"Christian Bale, Gary Oldman, Tom Hardy, Joseph...",Nominated for 1 BAFTA Film Award. Another 38 w...,448130642,"UK, USA",2012-12-03,Christopher Nolan,"Action, Thriller","English, Arabic",78.0,Warner Bros. Pictures,PG-13,2012-07-20,164,The Dark Knight Rises,movie,"Jonathan Nolan (screenplay), Christopher Nolan...",tt1345836,8.4,1244136
3,"Johnny Depp, Armie Hammer, William Fichtner, T...",Nominated for 2 Oscars. Another 4 wins & 17 no...,89289910,USA,2013-12-17,Gore Verbinski,"Action, Adventure, Western","English, North American Indian",37.0,Walt Disney Pictures,PG-13,2013-07-03,150,The Lone Ranger,movie,"Justin Haythe (screenplay), Ted Elliott (scree...",tt1210819,6.5,193685
4,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",2 wins & 8 nominations.,73058679,USA,2012-06-05,Andrew Stanton,"Action, Adventure, Sci-Fi",English,51.0,Walt Disney Pictures,PG-13,2012-03-09,132,John Carter,movie,"Andrew Stanton (screenplay), Mark Andrews (scr...",tt0401729,6.6,222756
