In [None]:
import mysql.connector
from mysql.connector import errorcode
import pandas as pd
import numpy as np

In [None]:
# Function to insert 'list_oscars_BS' into the 'oscars' table.
def insert_oscars():
    # Connection
    cnx = mysql.connector.connect(user='root', password='AlumnaAdalab', host='127.0.0.1', database='cinem_extract')

    # Cursor creation
    mycursor = cnx.cursor()

    try: 
        # Read CSV file
        df = pd.read_csv('data/oscars/list_oscars_BS.csv') 

        # SQL to insert data. You must indicate the name of the columns in the first parenthesis and in values enter the number of columns with %s
        sql_insert_query = """ INSERT INTO oscars (ceremony_year, best_movie, best_director, best_actor, best_actress) VALUES (%s, %s, %s, %s, %s)"""

        # Iterate over each row of the DataFrame and insert the data for each column.
        for _, row in df.iterrows():
            mycursor.execute(sql_insert_query, (row['ceremony_year'], row['best_movie'], row['best_director'], row['best_actor'], row['best_actress'])) 

        # Confirm changes with commit
        cnx.commit() 
        print(f"{mycursor.rowcount} registro(s) insertado(s)📝") 

    except mysql.connector.Error as err: 
        print(f"Error al insertar en la base de datos: {err} ❌") 

    finally: 
        if cnx.is_connected(): 
            mycursor.close() 
            cnx.close() 
            print("Conexión a la base de datos cerrada🔚")

In [None]:
# call the insert function for the oscars table
insert_oscars()

In [None]:
# 🎭 "ACTOR DETAILS" TABLE --> DEFINE ✍️ FUNCTIONS

# function for cleaning actor_details .csv: replaces 'none' and NaN with 0

def clean_ad_data (genre, start_page, end_page):
    #1- read csv
    df = pd.read_csv(f'data/selenium_actors/list_{genre}_range_{start_page}_{end_page}_sel_actor.csv')
    print (f"data/selenium_actors/list_{genre}_range_{start_page}_{end_page}_sel_actor.csv opened")
    
    #2- find and replace 'none' with None
    df["birth_year"].replace("none", 0, inplace=True)
    df.fillna(value=0, inplace=True)
    df.set_index('actor_name', inplace=True)
    print ("csv updated")
    
    #3 save the updated data as .csv
    df.to_csv(f'data/selenium_actors/clean/list_{genre}_range_{start_page}_{end_page}_sel_actor.csv')
    print (f"data/selenium_actors/clean/list_{genre}_range_{start_page}_{end_page}_sel_actor.csv CREATED✅")

# function to insert the data into 'actor_details' table

def insert_actor_details(genre, start_page, end_page):
    cnx = mysql.connector.connect(user='root', password='AlumnaAdalab', host='127.0.0.1', database='cinem_extract')

    mycursor = cnx.cursor()

    try: 
        df = pd.read_csv(f'data/selenium_actors/clean/list_{genre}_range_{start_page}_{end_page}_sel_actor.csv') # The CSV has an 'only' to distinguish temporary data.

        sql_insert_query = """ 
        INSERT INTO actor_details (actor_name, birth_year, known_for, actor_role, actor_awards)
        VALUES (%s, %s, %s, %s, %s) 
        ON DUPLICATE KEY UPDATE 
            actor_name = VALUES(actor_name),
            birth_year = VALUES(birth_year),
            known_for = VALUES(known_for),
            actor_role = VALUES(actor_role),
            actor_awards = VALUES(actor_awards)""" # We enter only one column value. It is temporary until we update it.

        for _, row in df.iterrows():
            mycursor.execute(sql_insert_query, (row['actor_name'],row['birth_year'],row['known_for'],row['actor_role'],row['actor_awards'])) 

        cnx.commit() 
        print(f"{mycursor.rowcount} registro(s) insertado(s)📝") 

    except mysql.connector.Error as err: 
        print(f"Error al insertar en la base de datos: {err} ❌") 

    finally: 
        if cnx.is_connected(): 
            mycursor.close() 
            cnx.close() 
            print("Conexión a la base de datos cerrada🔚")

In [None]:
# 🎭 "ACTOR DETAILS" TABLE --> EXECUTE ▶️ FUNCTIONS


# enter the correspondent genre and start and end page
genre = 'drama'       # enter "action" "comedy" "drama" "oscars"
start_page = 1
end_page = 5

# iterate over the list of files
for i in range(start_page, end_page):
    
    print(f"Iteracion {i}")
    
    # clean data
    clean_ad_data (genre, i, i+1)

    # insert data
    insert_actor_details(genre, i, i+1)
    

In [None]:
# Function to insert the data from 'actor_awards.csv' into the 'actor_awards' table. 
# It will not work if we don't have data in the 'actor_details' table.
def insert_actor_awards():
    cnx = mysql.connector.connect(user='root', password='AlumnaAdalab', host='127.0.0.1', database='cinem_extract')

    mycursor = cnx.cursor()

    try: 
        df = pd.read_csv('data/oscars/actor_awards.csv')
    
        sql_insert_query = """ INSERT INTO actor_awards (actor_name, ceremony_year, award_type) VALUES (%s, %s, %s)"""

        for _, row in df.iterrows():
            mycursor.execute(sql_insert_query, (row['actor_name'], row['ceremony_year'], row['award_type'])) 

        cnx.commit() 
        print(f"{mycursor.rowcount} registro(s) insertado(s)📝") 

    except mysql.connector.Error as err: 
        print(f"Error al insertar en la base de datos: {err} ❌") 

    finally: 
        if cnx.is_connected(): 
            mycursor.close() 
            cnx.close() 
            print("Conexión a la base de datos cerrada🔚")

In [None]:
# call the insert function for the oscars table
insert_actor_awards()

In [None]:
# insert data into relation_movie_oscars

def insert_relation_movie_oscars():
    # Connection
    cnx = mysql.connector.connect(user='root', password='AlumnaAdalab', host='127.0.0.1', database='cinem_extract')

    # Cursor creation
    mycursor = cnx.cursor()

    try: 
        # Read CSV file
        df = pd.read_csv('data/oscars/relation_movie_oscars.csv')

        # SQL to insert data. You must indicate the name of the columns in the first parenthesis and in values enter the number of columns with %s
        sql_insert_query = """ INSERT INTO relation_movie_oscars (id_imdb, best_movie, ceremony_year) VALUES (%s, %s, %s)"""

        # Iterate over each row of the DataFrame and insert the data for each column.
        for _, row in df.iterrows():
            mycursor.execute(sql_insert_query, (row['id_imdb'], row['best_movie'], row['ceremony_year'])) 

        # Confirm changes with commit
        cnx.commit() 
        print(f"{mycursor.rowcount} registro(s) insertado(s)📝")

    except mysql.connector.Error as err:
        print(f"Error al insertar en la base de datos: {err} ❌") 

    finally: 
        if cnx.is_connected(): 
            mycursor.close() 
            cnx.close() 
            print("Conexión a la base de datos cerrada🔚")


In [None]:
# call the insert function for relation_movie_oscars

insert_relation_movie_oscars()

In [None]:
# 📽️ "MOVIE DETAILS" TABLE --> DEFINE ✍️ FUNCTIONS

# we will probably need to define a function to clean up the data


# function to insert the data into 'movie_details' table

def insert_movie_details(genre, start_page, end_page):
    cnx = mysql.connector.connect(user='root', password='AlumnaAdalab', host='127.0.0.1', database='cinem_extract')

    mycursor = cnx.cursor()

    try: 
        df = pd.read_csv(f'data/selenium_movies/list_{genre}_range_{start_page}_{end_page}_sel_movies.csv')

        sql_insert_query = """ 
        INSERT INTO movie_details (id_imdb, score_imdb, score_rt, director_imdb, screenwriters_imdb, plot_rt, duration_imdb, title_imdb)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE 
            id_imdb = VALUES(id_imdb),
            score_imdb = VALUES(score_imdb),
            score_rt = VALUES(score_rt),
            director_imdb = VALUES(director_imdb),
            screenwriters_imdb = VALUES(screenwriters_imdb)
            plot_rt = VALUES(plot_rt)
            duration_imdb = VALUES(duration_imdb)
            title_imdb = VALUES (title_imdb)"""

        for _, row in df.iterrows():
            mycursor.execute(sql_insert_query, (row['id_imdb'],row['score_imdb'],row['score_rt'],row['director_imdb'],row['screenwriters_imdb'],row['plot_rt'],row['duration_imdb'],row['title_imdb'])) 

        cnx.commit() 
        print(f"{mycursor.rowcount} registro(s) insertado(s)📝") 

    except mysql.connector.Error as err: 
        print(f"Error al insertar en la base de datos: {err} ❌") 

    finally: 
        if cnx.is_connected(): 
            mycursor.close() 
            cnx.close() 
            print("Conexión a la base de datos cerrada🔚")

In [None]:
# 📽️ "MOVIE DETAILS" TABLE --> EXECUTE ▶️ FUNCTIONS


# enter the correspondent genre and start and end page
genre = 'action'       # enter "action" "comedy" "drama" "oscars"
start_page = 1
end_page = 2

# iterate over the list of files
for i in range(start_page, end_page):
    
    print(f"Iteracion {i}")

    # insert data
    insert_movie_details(genre, i, i+1)