In [20]:
# import everything we may need
import mysql.connector
from mysql.connector import errorcode
import config
import requests
import time
import json
import re

In [21]:
#get data from box office mojo as json file
f = open('mojo_data.json')
data = json.load(f)

In [6]:
# connect to AWS
cnx = mysql.connector.connect(
    host = config.host,
    user = config.user,
    passwd = config.password
)

# Establish cursor
cursor = cnx.cursor()

In [None]:
## function to close the connection
def close_connections():
    cursor.close()
    conn.close()

In [7]:
#name the db
db_name = 'Mod_1_Project'

In [8]:
# create database in AWS
def create_database(cursor, database_name):
    try:
        cursor.execute(
            "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(database_name))
    except mysql.connector.Error as err:
        ## Catch the error if an error occurs.
        print("Failed creating database: {}".format(err))
        exit(1)
        
try:
    cursor.execute("USE {}".format(db_name))
except mysql.connector.Error as err:
    print("Database {} does not exist.".format(db_name))
    if err.errno == errorcode.ER_BAD_DB_ERROR:
        create_database(cursor, db_name)
        print("Database {} created successfully.".format(db_name))
        cnx.database = db_name
    else:
        print(err)
        exit(1)

In [23]:
#get list of movies from box office mojo data
titles = [x['title'] for x in data]
len(titles)

2050

In [None]:
key = config.apiKey
url = 'http://www.omdbapi.com/?i=tt3896198&apikey='

# Make call to API
# Iterate over list of titles (from box office mojo) to retrieve all

def OMDB_call(titles):
    all_titles = []
    for title in titles: 
        parameters = {'t': {title}}
        response = requests.get(url + key, params=parameters)
        data = response.json()
        all_titles.append(data)
        time.sleep(.5)
    return all_titles

movie_dict = OMDB_call(titles[:1000])

In [19]:
# print(movie_dict[0])
# # movie_dict[17]
# movie_dict[18]
def check_for_title(data):
    movies_dict = []
    for movie in movie_dict:
        if 'Title' in movie.keys():
            movies_dict.append(movie)
    return movies_dict
movies_dict = check_for_title(movie_dict)
movies_dict

[{'Title': 'The Boss Baby',
  'Year': '2017',
  'Rated': 'PG',
  'Released': '31 Mar 2017',
  'Runtime': '97 min',
  'Genre': 'Animation, Adventure, Comedy, Family, Fantasy',
  'Director': 'Tom McGrath',
  'Writer': 'Michael McCullers, Marla Frazee (based on the book by)',
  'Actors': 'Alec Baldwin, Steve Buscemi, Jimmy Kimmel, Lisa Kudrow',
  'Plot': 'A suit-wearing, briefcase-carrying baby pairs up with his 7-year old brother to stop the dastardly plot of the CEO of Puppy Co.',
  'Language': 'English, Spanish',
  'Country': 'USA',
  'Awards': 'Nominated for 1 Oscar. Another 1 win & 19 nominations.',
  'Poster': 'https://m.media-amazon.com/images/M/MV5BMTg5MzUxNzgxNV5BMl5BanBnXkFtZTgwMTM2NzQ3MjI@._V1_SX300.jpg',
  'Ratings': [{'Source': 'Internet Movie Database', 'Value': '6.3/10'},
   {'Source': 'Rotten Tomatoes', 'Value': '52%'},
   {'Source': 'Metacritic', 'Value': '50/100'}],
  'Metascore': '50',
  'imdbRating': '6.3',
  'imdbVotes': '94,906',
  'imdbID': 'tt3874544',
  'Type': 'm

In [35]:
#function to create tables in our database
def create_table(dict_of_tables, db_name):
    for table_name in dict_of_tables:
        table_query = dict_of_tables[table_name]
        cursor.execute("USE {}".format(db_name))
        try:
            print("Creating table {}: ".format(table_name), end='')
            cursor.execute(table_query)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
                print("already exists.")
            else:
                print(err.msg)
        else:
            print("OK")


In [187]:
#Create tables to input data
TABLES = {}
TABLES['main_movie_table'] = (
    "CREATE TABLE main_movie_table ("
    "  movie_id int NOT NULL AUTO_INCREMENT,"
    "  title text NOT NULL,"
    "  budget int(10),"
    "  release_date date,"
    "  award_wins int(3),"
    "  award_nominations int(3),"
    "  RT_rating int(2),"
    "  IMDB_rating real,"
    "  PRIMARY KEY (movie_id)"
    ") ENGINE=InnoDB")

TABLES['genres'] = (
    "CREATE TABLE genres ("
    "  genre_id int NOT NULL AUTO_INCREMENT,"
    "  genre text,"
    "  PRIMARY KEY (genre_id)"
    ") ENGINE=InnoDB")

TABLES['key_words'] = (
    "CREATE TABLE key_words ("
    "  keyword_id int NOT NULL AUTO_INCREMENT,"
    "  key_words text,"
    "  PRIMARY KEY (keyword_id)"
    ") ENGINE=InnoDB")

TABLES['movies_genres'] = (
    "CREATE TABLE movies_genres ("
      " movie_id int NOT NULL,"
      " genre_id int NOT NULL"
      ") ENGINE=InnoDB")

TABLES['movies_keywords'] = (
    "CREATE TABLE movies_keywords ("
      " movie_id int NOT NULL,"
      " keyword_id text NOT NULL"
      ") ENGINE=InnoDB")

TABLES['actors'] = (
    "CREATE TABLE actors ("
    " actor_id int NOT NULL AUTO_INCREMENT," 
    " actor_name text NOT NULL,"
    " PRIMARY KEY (actor_id)"
    ") ENGINE=InnoDB")

TABLES['movie_actors'] = (
    "CREATE TABLE movie_actors("
    " movie_id int NOT NULL,"
    " actor_id int NOT NULL"
    ") ENGINE=InnoDB")

create_table(TABLES, 'Mod_1_Project')

Creating table main_movie_table: OK
Creating table genres: already exists.
Creating table key_words: already exists.
Creating table movies_genres: already exists.
Creating table movies_keywords: already exists.
Creating table actors: already exists.
Creating table movie_actors: already exists.


In [150]:
## Function to drop all tables to refresh
def drop_everything():
    cursor.execute("""DROP TABLE Reviews;""")
    conn.commit()
    cursor.execute("""DROP TABLE Businesses;""")
    conn.commit()


In [18]:
# function for OMDB data
# function will take awards data and a dict of awards
def award_results(omdb_data):
    results_dict = {}
    
    for movie in omdb_data:
        title = movie['Title']
        award_string = movie['Awards']
      
        # create a dictionary that holds award info for current movie
        movie_awards = {}
        
        # get numbers in string, cast them to int
        nums = map(int, re.findall('\d+', award_string))
        
        # create list of words/numbers in award string to iterate on
        award_list = award_string.split(' ')
        
        for num in nums:
            # the word following the number describes the award,
            # so we get that index
            descrip_index = award_list.index(str(num)) + 1
            
            # add dictionary entry - key is description, value 
            # is number of that award
            movie_awards[award_list[descrip_index].strip('.')] = num
            
            # deletes number from award_list in case there are
            # repeated numbers -- num removes first element matching
            # that index
            award_list.remove(str(num))
            
        # adds all awards that aren't nominations to get total wins
        
        for award in movie_awards.keys():
            if (award.lower() != 'nominations' or award.lower() != 'nomination') and (award.lower() != 'wins' or award.lower() != 'win'):
                if 'win' in movie_awards.keys():
                    movie_awards['win'] += movie_awards[award]
                elif 'wins' in movie_awards.keys():
                    movie_awards['wins'] += movie_awards[award]
                      
        results_dict[title] = movie_awards
    
    return results_dict
    
awards = award_results(movies_dict)
awards

['Nominated', 'for', '1', 'Oscar.', 'Another', '1', 'win', '&', '19', 'nominations.']
['3', 'nominations.']
['N/A']
['Nominated', 'for', '1', 'Oscar.', 'Another', '12', 'wins', '&', '42', 'nominations.']
['8', 'wins', '&', '19', 'nominations.']
['2', 'wins', '&', '3', 'nominations.']
['1', 'nomination.']
['Nominated', 'for', '1', 'Golden', 'Globe.', 'Another', '1', 'win', '&', '14', 'nominations.']
['3', 'nominations.']
['25', 'nominations.']
['4', 'wins', '&', '9', 'nominations.']
['Nominated', 'for', '1', 'Oscar.', 'Another', '3', 'wins', '&', '40', 'nominations.']
['11', 'wins', '&', '58', 'nominations.']
['1', 'nomination.']
['N/A']
['Nominated', 'for', '4', 'Oscars.', 'Another', '12', 'wins', '&', '71', 'nominations.']
['16', 'nominations.']
['Nominated', 'for', '3', 'Oscars.', 'Another', '31', 'wins', '&', '44', 'nominations.']
['6', 'wins', '&', '15', 'nominations.']
['1', 'win', '&', '6', 'nominations.']
['2', 'wins', '&', '9', 'nominations.']
['2', 'wins.']
['2', 'nominations.

{'The Boss Baby': {'Oscar': 1, 'win': 23, 'nominations': 19},
 "A Dog's Purpose": {'nominations': 3},
 'Jigsaw': {},
 'Guardians of the Galaxy Vol. 2': {'Oscar': 1, 'wins': 68, 'nominations': 42},
 'Split': {'wins': 35, 'nominations': 19},
 'Jumanji: Welcome to the Jungle': {'wins': 7, 'nominations': 3},
 'All Eyez on Me': {'nomination': 1},
 'Downsizing': {'Golden': 1, 'win': 18, 'nominations': 14},
 'Annabelle: Creation': {'nominations': 3},
 'Despicable Me 3': {'nominations': 25},
 'Spider-Man: Homecoming': {'wins': 17, 'nominations': 9},
 "Molly's Game": {'Oscar': 1, 'wins': 48, 'nominations': 40},
 'The Lego Batman Movie': {'wins': 80, 'nominations': 58},
 'American Assassin': {'nomination': 1},
 'Geostorm': {},
 'Star Wars: The Last Jedi': {'Oscars': 4, 'wins': 103, 'nominations': 71},
 'Transformers: The Last Knight': {'nominations': 16},
 'Baby Driver': {'Oscars': 3, 'wins': 112, 'nominations': 44},
 'Atomic Blonde': {'wins': 27, 'nominations': 15},
 'Justice League': {'win': 8

In [None]:
#insert some of the movie data into tables
def add_movies_toDB():
    for movie in movies_dict:
        cursor.execute('''insert into main_movie_table 
        (title, release_date, RT_rating, IMDB_rating) values(%s,%s,%s,%s)''',
        movie['Title'], movie['Released'], movie['Rating'][1]['Value'].replace('%', ''), movie['imdbRating']) 
        conn.commit

def add_awards_toDB():
    for title in awards.keys():
        cursor.execute('''update main_movie_table  
        set award_wins = %s, set award_nominations = %s, where title = %s''', (awards, award[], title) 
        conn.commit
        
        

    
#insert the genre data into genre table         
def add_genres_toDB():
    for movie in movies_dict:
        ('''insert into genres (genre) values(%s)''', movie['Genre'])
        conn.commit
            
