In [None]:
import os
import pandas as pd
from IPython.display import Markdown, display
import mysql.connector as ms

In [None]:
# # Usar no Google Colab
# from google.colab import auth
# auth.authenticate_user()
# path_folder_gold_dim = '/content/'
# path_folder_gold_fact = '/content/'

In [None]:
# Usar no VSCode
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../gcp_key.json"
path_folder_gold_dim = f'../data/temp/gold/dim/'
path_folder_gold_fact = f'../data/temp/gold/fact/'

In [None]:
def extract_year_month(date): # Função para extrair o ano e o mês de uma data
    return date.strftime('%y%m')

year_month = extract_year_month(pd.to_datetime('2017-06-01'))  # Exemplo de uso da função, como se fosse o dia 01 de junho de 2017
print(f'Year-Month: {year_month}')

In [None]:
file_name_dim_Line = 'dim_mta_PublishedLine.csv'
file_name_dim_Vehicle = 'dim_mta_VehicleRef.csv'
file_name_fact = f'fact_mta_{year_month}.csv'

In [None]:
def read_csv_modeled_data(path_folder_gold, file_name): # Função para ler o arquivo CSV limpo
    df = pd.read_csv(os.path.join(path_folder_gold, file_name))
    
    print(f'Arquivo {file_name} lido.')
    return df

df_dim_Line = read_csv_modeled_data(path_folder_gold_dim, file_name_dim_Line)
df_dim_Vehicle = read_csv_modeled_data(path_folder_gold_dim, file_name_dim_Vehicle)
df_fact = read_csv_modeled_data(path_folder_gold_fact, file_name_fact)

display(Markdown(f'# {file_name_dim_Line}'))
display(df_dim_Line.info())
display(df_dim_Line.head())

display(Markdown(f'# {file_name_dim_Vehicle}'))
display(df_dim_Vehicle.info())
display(df_dim_Vehicle.head())

display(Markdown(f'# {file_name_fact}'))
display(df_fact.info())
display(df_fact.head())

In [None]:
def connect_to_mysql():
    try:
        connection = ms.connect(
            host='localhost',
            user='diego',
            password='diego',
        )
        print("Conexão ao MySQL estabelecida.")
        return connection
    except ms.Error as err:
        print(f"Erro ao conectar ao MySQL: {err}")
        return None
    
ms_connection = connect_to_mysql()

In [None]:
def create_cursor(ms_connection):
    try:
        ms_cursor = ms_connection.cursor()
        print("Cursor do MySQL criado.")
        return ms_cursor
    except:
        print("Erro ao criar o cursor do MySQL.")
        return None
    
ms_cursor = create_cursor(ms_connection)

In [None]:
def drop_db(ms_cursor, db_name):
    try:
        ms_cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
        print(f"Banco de dados {db_name} excluído, se existia.")
        return ms_cursor
    except ms.Error as err:
        print(f"Erro ao excluir o banco de dados {db_name}: {err}")

In [None]:
def create_db(db_name, ms_cursor):
    sql = f"CREATE DATABASE IF NOT EXISTS {db_name}"
    try:
        ms_cursor.execute(sql)
        ms_cursor.execute(f"USE {db_name}")
        print(f"Banco de dados '{db_name}' criado/selecionado.")
        return ms_cursor
    except:
        print(f"Erro ao criar o banco de dados '{db_name}'.")
        
ms_cursor = create_db('db_bus_gps', ms_cursor)

In [None]:
def create_tb(tb_name, sql, ms_cursor):
    try:
        ms_cursor.execute(sql)
        print(f"Tabela '{tb_name}' criada ou já existe.")
        return ms_cursor
    except ms.Error as err:
        print(f"Erro ao criar a tabela '{tb_name}': {err}") 

In [None]:
tb_name = 'tb_line'
        
sql = f'''
    CREATE TABLE IF NOT EXISTS {tb_name} (
        Published_Line_Name VARCHAR(10) NOT NULL PRIMARY KEY
    )
'''

ms_cursor = create_tb(tb_name, sql, ms_cursor)   

ms_cursor.execute("SHOW TABLES")
print([table for table in ms_cursor])

In [None]:
tb_name = 'tb_vehicle'
        
sql = f'''
    CREATE TABLE IF NOT EXISTS {tb_name} (
        Vehicle_Ref VARCHAR(15) NOT NULL PRIMARY KEY
    )
'''

ms_cursor = create_tb(tb_name, sql, ms_cursor)   

ms_cursor.execute("SHOW TABLES")
print([table for table in ms_cursor])

In [None]:
tb_name = 'tb_bus_gps'

sql = f'''CREATE TABLE IF NOT EXISTS {tb_name} (
        Recorded_At_Time        DATETIME NOT NULL,
        Direction_Ref           INT NOT NULL,
        Published_Line_Name     VARCHAR(15) NOT NULL,
        Vehicle_Ref             VARCHAR(10) NOT NULL,
        Scheduled_Arrival_Time  DATETIME NOT NULL,
        Recorded_A_tDate        DATE NOT NULL,
        Diff_Arrival_Mins       INT NOT NULL,
        Recorded_Time_Range     INT NOT NULL,
        ScheduledTimeRange      INT NOT NULL,
        
        FOREIGN KEY (Vehicle_Ref) REFERENCES tb_vehicle(Vehicle_Ref),
        FOREIGN KEY (Published_Line_Name) REFERENCES tb_line(Published_Line_Name)
    )
    '''
    
ms_cursor = create_tb(tb_name, sql, ms_cursor)  

ms_cursor.execute("SHOW TABLES")
print([table for table in ms_cursor]) 