In [1]:
## Connect to DB server on AWS
import mysql.connector
from mysql.connector import errorcode
import config
import requests
import time

db_name= 'Movies'
cnx = mysql.connector.connect(
    host = config.host,
    user = config.user,
    passwd = config.password,
    
)
cursor = cnx.cursor()
print(cnx)

<mysql.connector.connection.MySQLConnection object at 0x10599cf98>


In [2]:
## Create new DB 
def create_database(cursor, database):
    try:
        cursor.execute(
            "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(database))
    except mysql.connector.Error as err:
        print("Failed creating database: {}".format(err))
        exit(1)

try:
    cursor.execute("USE {}".format(db_name))
except mysql.connector.Error as err:
    print("Database {} does not exists.".format(db_name))
    if err.errno == errorcode.ER_BAD_DB_ERROR:
        create_database(cursor, db_name)
        print("Database {} created successfully.".format(db_name))
        cnx.database = db_name
    else:
        print(err)
        exit(1)

In [3]:
db_name= 'Movies'

TABLES = {}
TABLES['people'] = (
    "CREATE TABLE people ("
    "  personId int NOT NULL AUTO_INCREMENT,"
    "  personName varchar(35) NOT NULL,"     
    "  personRole varchar(10) NOT NULL,"   
    "  PRIMARY KEY (personId),"
    "  UNIQUE KEY  (personName,personRole)"
    ") ENGINE=InnoDB")

TABLES['movies_people'] = """CREATE TABLE movies_people (
        movieId INT NOT NULL,
        personId INT NOT NULL,
        PRIMARY KEY (movieId,personId),
        INDEX movieId_idx (movieId ASC),
        CONSTRAINT movieId
            FOREIGN KEY (movieId)
            REFERENCES movies (movieId)
            ON DELETE CASCADE
            ON UPDATE CASCADE,
        CONSTRAINT personId
            FOREIGN KEY (personId)
            REFERENCES people (personId)
            ON DELETE CASCADE
            ON UPDATE CASCADE
) ENGINE=InnoDB;
"""

In [4]:
def create_table(dict_of_tables):
    for table_name in dict_of_tables:
        table_description = dict_of_tables[table_name]
        try:
            print("Creating table {}: ".format(table_name), end='')  
            cursor.execute(table_description)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
                print("already exists.")
            else:
                print(err.msg)
        else:
            print("OK")
            
def drop_everything():
    cursor.execute("""DROP TABLE movies_people;""")
    cursor.execute("""DROP TABLE people;""")
    cnx.commit()
    
def close_connections():
    cursor.close()
    cnx.close()

In [5]:
create_table(TABLES)

Creating table people: already exists.
Creating table movies_people: already exists.


In [7]:
def all_results(movies):
    results = []
    
    for i in range(len(movies)):
        title = movies[i][1]
        
        url_params = {'term': title.replace(' ', '+')}
        url = f'http://www.omdbapi.com/?apikey={config.api_key}&t={title}'
        response = requests.get(url, params=url_params)
        results.append(response.json())
        time.sleep(1) # wait a second
    return results

def get_movie_titles():
    cursor.execute("""SELECT movieId, movieTitle FROM movies;""")
    return cursor.fetchall()

In [8]:
movie_titles = get_movie_titles()
titles_only = [movie_tuple[1].replace('.','') for movie_tuple in movie_titles]
print(len(movie_titles))
print(titles_only)

906
['10,000 BC', '102 Dalmatians', '2 Fast 2 Furious', '2 Guns', '2012', '300', '300: Rise of an Empire', '47 Ronin', '50 First Dates', 'A Beautiful Mind', 'A Civil Action', 'A Dogâ\x80\x99s Way Home', 'A Good Day to Die Hard', 'A Sound of Thunder', 'A Wrinkle in Time', 'Abraham Lincoln: Vampire Hunter', 'After Earth', 'After the Sunset', 'Air Force One', 'Air Strike', 'Aladdin', 'Alexander', 'Ali', 'Alice in Wonderland', 'Alice Through the Looking Glass', 'Alien: Covenant', 'Alien: Resurrection', 'Alita: Battle Angel', 'Allied', 'Alvin and the Chipmunks', 'Alvin and the Chipmunks: Chipwrecked', 'Alvin and the Chipmunks: The Road Chip', 'Alvin and the Chipmunks: The Squeakquel', 'American Assassin', 'American Gangster', 'American Sniper', 'American Wedding', 'Analyze That', 'Angels & Demons', 'Anger Management', 'Annie', 'Ant-Man', 'Ant-Man and the Wasp', 'Antz', 'Any Given Sunday', 'Apollo 13', 'Aquaman', 'Armageddon', 'Around the World in 80 Days', 'Arthur Christmas', 'Arthur et les

In [29]:
# raw_movie_data = all_results(movie_titles)
# print(len(raw_movie_data) == len(movie_titles))
counter = 0
for temp in raw_movie_data:
#     print(temp)
    if temp['Response'] == 'True':
        if temp['Title'] in titles_only:
            counter += 1
        else:
            print(temp['Title'])

Batman
Battle Los Angeles
Cats
Cats
Die Hard with a Vengeance
Dr. Dolittle 2
Edtv
Fast & Furious 6
Fun with Dick and Jane
G.I. Joe: Retaliation
G.I. Joe: The Rise of Cobra
George of the Jungle
GoldenEye
Guardians of the Galaxy Vol. 2
Horton Hears a Who!
I Am Legend
Just Go with It
Lara Croft Tomb Raider: The Cradle of Life
Lost in Space
Marley
Pen huo mei ren yu
All Access: Making Michael Jackson's This Is It
Mission: Impossible - Ghost Protocol
Monsters vs. Aliens
Monsters, Inc.
Mr. and Mrs. Smith
Mr. Peabody & Sherman
Percy Jackson
R.I.P.D.
Ralph Breaks the Internet
RED
S.W.A.T.
Scott Pilgrim vs. the World
Spider-Man: Far from Home
You Nazty Spy!
Terminator Genisys
The Lego Movie 2: The Second Part
The Man from U.N.C.L.E.
The Twilight Saga: Breaking Dawn - Part 1
The Twilight Saga: Breaking Dawn - Part 2
The World Is Not Enough
TRON: Legacy
Underworld Awakening
Up Close
WALL·E
Warcraft: The Beginning
Who Framed Roger Rabbit
xXx: State of the Union
You Don't Mess with the Zohan
Zathur

In [10]:
def moviesfromOM(initialdata):
    movies=[]
    for movie in initialdata:
        if movie['Response'] == 'True':
            movieinfo={}
            movieinfo['Title']=movie['Title']
            movieinfo["Director"]=[director.strip() for director in movie['Director'].split(',')] # how does it add quotation to each? 
            movieinfo['Writer']=[writer.split('(')[0].strip() for writer in movie['Writer'].split(',')]
            #movieinfo['Writer'] = movie['Writer'].split(',')
            movieinfo['Actors']=[actor.strip() for actor in movie['Actors'].split(',')]
            movies.append(movieinfo)
    return movies

In [11]:
movie_data = moviesfromOM(raw_movie_data)
movie_data

[{'Title': '10,000 BC',
  'Director': ['Roland Emmerich'],
  'Writer': ['Roland Emmerich', 'Harald Kloser'],
  'Actors': ['Steven Strait', 'Camilla Belle', 'Cliff Curtis', 'Joel Virgel']},
 {'Title': '102 Dalmatians',
  'Director': ['Kevin Lima'],
  'Writer': ['Dodie Smith',
   'Kristen Buckley',
   'Brian Regan',
   'Kristen Buckley',
   'Brian Regan',
   'Bob Tzudiker',
   'Noni White'],
  'Actors': ['Glenn Close',
   'Gérard Depardieu',
   'Ioan Gruffudd',
   'Alice Evans']},
 {'Title': '2 Fast 2 Furious',
  'Director': ['John Singleton'],
  'Writer': ['Gary Scott Thompson',
   'Michael Brandt',
   'Derek Haas',
   'Gary Scott Thompson',
   'Michael Brandt',
   'Derek Haas'],
  'Actors': ['Paul Walker', 'Tyrese Gibson', 'Eva Mendes', 'Cole Hauser']},
 {'Title': '2 Guns',
  'Director': ['Baltasar Kormákur'],
  'Writer': ['Blake Masters', 'Steven Grant'],
  'Actors': ['Denzel Washington',
   'Mark Wahlberg',
   'Paula Patton',
   'Edward James Olmos']},
 {'Title': '2012',
  'Director'

In [12]:
def make_people_tuples(movie_dict):    
    retVal = []
    title = movie_dict['Title'] ## wh
    for key in movie_dict.keys():
        if key.title() == 'Director' or key.title() == 'Writer' or key.title() == 'Actors':
            for person in movie_dict[key]:
                retVal.append((person,key,title))
    return retVal

In [12]:
temp = []
for movie in movie_data:
    temp.extend(make_people_tuples(movie))
print(len(temp))

887


In [16]:
def add_people(list_of_people):
#     for person in list_of_people:
#         print(person)
#         cursor.execute("""INSERT IGNORE INTO people (personName,personRole) VALUES (%s,%s)""", person[:2])
#         cnx.commit()
    for person in list_of_people:
        connect_people_to_movies(person)

In [24]:
def connect_people_to_movies(person_info):
#     try:
    movie_id = movie_titles[titles_only.index(person_info[2])][0] ## movieId of movie
#     print(person_info)
    query = f"""SELECT personId FROM people WHERE personName = "{person_info[0]}" AND personRole = "{person_info[1]}";"""
#     print(query)
    cursor.execute(query)
#     print("EXECUTED")
    person_id = cursor.fetchall()
#     print(movie_id,person_id)
    cursor.execute("""INSERT IGNORE INTO movies_people (movieId,personId) VALUES (%s,%s);""",(movie_id,person_id[0][0]))
#     print("INSERTED")
    cnx.commit()
#     print("COMMITED")
#     except:
#         print("Gone")

In [25]:
for movie in movie_data:
    add_people(make_people_tuples(movie))
# movie_titles

InterfaceError: No result set to fetch from.

In [23]:
# cursor.execute("""SELECT * FROM people LIMIT 5;""")
cursor.fetchall()

[]

In [6]:
# drop_everything()
# create_table(TABLES)

Creating table people: OK
Creating table movies_people: OK


In [18]:
close_connections()