In [None]:
# Importing standard libraries
import os

# Importing third party libraries
import yaml
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine

In [None]:
FILENAME = os.path.join(os.getcwd(), 'creds.yaml')

In [None]:
with open(FILENAME, "r") as file:
    creds = yaml.safe_load(file)

In [None]:
QUERY_CREATE_ZIPCODE_DIM_TABLE ='''
CREATE TABLE IF NOT EXISTS zipcode_dim (
  zipcode_id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
  zipcode varchar(50) NOT NULL,
  zc_latitude float NOT NULL,
  zc_longitude float NOT NULL,
  autonomous_community varchar(50) NOT NULL,
  autonomous_community_nk varchar(50) NOT NULL,
  province varchar(50) NOT NULL
);
'''

In [None]:
QUERY_CREATE_WEATHER_DIM_TABLE = '''
CREATE TABLE IF NOT EXISTS weather_dim (
  weather_id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
  zipcode_id int NOT NULL,
  year int NOT NULL,
  avg_temperature float NOT NULL,
  avg_relative_humidity float NOT NULL,
  avg_precipitation_rate float NOT NULL,
  avg_wind_speed float NOT NULL,
  Foreign Key (zipcode_id) references zipcode_dim(zipcode_id)
);
'''

In [None]:
QUERY_CREATE_SALES_FT_TABLE = '''
CREATE TABLE IF NOT EXISTS sales_fact (
  sales_id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
  zipcode_id int NOT NULL,
  lead_id varchar(50) NOT NULL,
  financing_type varchar(50) NOT NULL,
  current_phase varchar(50) NOT NULL,
  phase_pre_ko varchar(50) NOT NULL,
  is_modified int,
  offer_sent_date date NOT NULL,
  contract_1_dispatch_date date NOT NULL,
  contract_2_dispatch_date date NOT NULL,
  contract_1_signature_date date NOT NULL,
  contract_2_signature_date date NOT NULL,
  most_recent_contract_signature_date date NOT NULL,
  visit_date date NOT NULL,
  technical_review_date date NOT NULL,
  project_validation_date date NOT NULL,
  sale_dismissal_date date NOT NULL,
  ko_date date NOT NULL,
  visiting_company varchar(50) NOT NULL,
  ko_reason varchar(50) NOT NULL,
  installation_peak_power_kwf float NOT NULL,
  installation_price float NOT NULL,
  n_panels smallint,
  customer_type varchar(50) NOT NULL,
  Foreign Key (zipcode_id) references zipcode_dim(zipcode_id)
);
'''						

In [None]:
def create_table():
    connection = mysql.connector.connect(
        user = creds['mysql-db']['username'],
        password = creds['mysql-db']['password'],
        host = creds['mysql-db']['host'],
        database = creds['mysql-db']['database'],
    )
    cursor = connection.cursor()
    
    cursor.execute(QUERY_CREATE_ZIPCODE_DIM_TABLE)
    cursor.execute(QUERY_CREATE_WEATHER_DIM_TABLE)
    cursor.execute(QUERY_CREATE_SALES_FT_TABLE)
    connection.commit()
    print("Table structures created successfully.")
    
    cursor.close()
    connection.close()

In [None]:
def write_to_database(dfs_dict, if_exists='append'):
    """
    Write a dataframe into a MySql table.

    Args:
        dfs_dict: The list of tables to load along with the dfs to insert.
        if_exists (str): Default 'append'. Other values are 'fail' and 'replace'
    """

    dfs_dict = {
        "zipcode_dim": list_of_transformed_dfs[0],
        "weather_dim": list_of_transformed_dfs[1],
        "sales_fact": list_of_transformed_dfs[2]
    }

    _db_user = creds['username']
    _db_password = creds['password']
    _db_host = creds['host']
    _db_name = creds['database']
    engine = create_engine(f"mysql+pymysql://{_db_user}:{_db_password}@{_db_host}:3306/{_db_name}")
    with engine.connect() as connection:
        for table_name, df in dfs_dict.items():
            if isinstance(df, pd.DataFrame): 
                df.to_sql(table_name, con=connection, if_exists=if_exists, index=False)
                print(f"Data successfully inserted into {table_name}")
            else:
                print(f"Skipping {table_name}: Not a valid DataFrame")
    
    # return logger.info("Completed uploading all data..")

In [None]:
if __name__ == "__main__":
    create_table()
    # write_to_database(dfs_dict)