# Importing data from a sample csv


In [44]:
# not sure why the instructor suggests using this csv method, when pandas can import an xls file directly
import csv

# This is just to show that I am able to read a csv file
with open("sample-data.csv", "r") as file:
    reader = csv.reader(file, delimiter=",")
    for row in reader:
        print(row)

['first_name', 'last_name', 'email']
['John', 'Doe', 'john@gmail.com']
['Kelly', 'Smith', 'ks@gmail.com']
['Roger', 'Moore', 'moore@gmail.com']


# Create DDL to create the database and tables


In [45]:
# first install the mysql-connector-python package using the following command
#   pip install mysql-connector-python

import mysql.connector
import yaml

db = yaml.safe_load(open("secrets.yaml"))

# Establish a connection to the MySQL server
cnx = mysql.connector.connect(
    host=db["DB_HOST"],
    user=db["DB_USER"],  # both root and andrewcbuensalida works
    password=db["DB_PASSWORD"],
)

# Create a cursor object
cursor = cnx.cursor()

# Drop the database if it exists
cursor.execute("DROP DATABASE IF EXISTS mrts")

# Create a database
cursor.execute("CREATE DATABASE mrts")

# Use the created database
cursor.execute("USE mrts")

# Create the NAICS code table
cursor.execute(
    """
    CREATE TABLE naics_code (
        id INT AUTO_INCREMENT PRIMARY KEY,
        naics_code VARCHAR(255)
    )
"""
)

# Create the kind of business table
cursor.execute(
    """
    CREATE TABLE kind_of_business (
        id INT AUTO_INCREMENT PRIMARY KEY,
        kind_of_business VARCHAR(255)
    )
"""
)

# Create the sales table
cursor.execute(
    """
    CREATE TABLE sales (
        id INT AUTO_INCREMENT PRIMARY KEY,
        sales_amount DECIMAL(10, 2),
        month INT,
        year INT,
        kind_of_business_id INT,
        FOREIGN KEY (kind_of_business_id) REFERENCES kind_of_business(id)
    )
"""
)

# Create the NAICS code-kind of business association table
cursor.execute(
    """
    CREATE TABLE naics_kind_of_business_association (
        naics_code_id INT,
        kind_of_business_id INT,
        FOREIGN KEY (naics_code_id) REFERENCES naics_code(id),
        FOREIGN KEY (kind_of_business_id) REFERENCES kind_of_business(id)
    )
"""
)

# Importing data from mrts csv year 2020 then save into mysql


In [46]:
# Insert kind of business into sql table
with open("mrtssales92-present.xls - 2020.csv", "r") as file:
    reader = csv.reader(file, delimiter=",")
    for index, row in enumerate(reader):
        if index >= 6 and index <= 70:  # not adjusted industry categories
            # Insert kind of business in table
            query = "INSERT INTO kind_of_business (kind_of_business) VALUES (%s)"
            data = (row[1],)

            # Execute the query
            cursor.execute(query, data)

            # Commit the transaction
            cnx.commit()

In [47]:
# Insert NAICS codes into table

with open("mrtssales92-present.xls - 2020.csv", "r") as file:
    reader = csv.reader(file, delimiter=",")
    for index, row in enumerate(reader):
        if index >= 6 and index <= 70 and row[0]:  # not adjusted industry categories
            codes = row[0].split(",")

            for code in codes:
                # Check if the code already exists in the table
                query = "SELECT COUNT(*) FROM naics_code WHERE naics_code = %s"
                data = (code,)

                # Execute the query
                cursor.execute(query, data)

                # Fetch the result
                result = cursor.fetchone()

                # Extract the count from the result
                count = result[0]

                # If the code doesn't exist, insert it into the table
                if count == 0:
                    query = "INSERT INTO naics_code (naics_code) VALUES (%s)"
                    data = (code,)

                    # Execute the query
                    cursor.execute(query, data)

                    # Commit the transaction
                    cnx.commit()

In [48]:
# Insert NAICS code-kind of business associations into table

with open("mrtssales92-present.xls - 2020.csv", "r") as file:
    reader = csv.reader(file, delimiter=",")
    for index, row in enumerate(reader):
        if index >= 6 and index <= 70 and row[0]:  # not adjusted industry categories
            codes = row[0].split(",")

            for code in codes:
                # Get the NAICS code ID
                query = "SELECT id FROM naics_code WHERE naics_code = %s"
                data = (code,)

                # Execute the query
                cursor.execute(query, data)

                # Fetch the result
                result = cursor.fetchone()

                # Extract the ID from the result
                naics_code_id = result[0]

                # Get the kind of business ID
                query = "SELECT id FROM kind_of_business WHERE kind_of_business = %s"
                data = (row[1],)

                # Execute the query
                cursor.execute(query, data)

                # Fetch the result
                result = cursor.fetchone()

                # Extract the ID from the result
                kind_of_business_id = result[0]

                # Insert the association into the table
                query = "INSERT INTO naics_kind_of_business_association (naics_code_id, kind_of_business_id) VALUES (%s, %s)"
                data = (naics_code_id, kind_of_business_id)

                # Execute the query
                cursor.execute(query, data)

                # Commit the transaction
                cnx.commit()

In [49]:
# Insert sales data into table
with open("mrtssales92-present.xls - 2020.csv", "r") as file:
    reader = csv.reader(file, delimiter=",")
    for index, row in enumerate(reader):
        if index == 0:
            title = row[0]

        if index >= 6 and index <= 70:  # not adjusted industry categories
            #  Get the kind of business id from sql
            query = "SELECT id FROM kind_of_business WHERE kind_of_business = %s"
            data = (row[1],)

            # Execute the query
            cursor.execute(query, data)

            # Fetch the result
            result = cursor.fetchone()

            # Extract the id from the result
            if result:
                kind_of_business_id = result[0]
            else:
                print(
                    "No matching id found"
                )  # maybe could add a new kind of business here

            # sales from january to december
            sales = row[2:-1]
            
            for index, sales_amount in enumerate(sales):
                month = index + 1
                year = int(title.split(":")[-1])
                if (
                    sales_amount != "(S)" and sales_amount
                ):  # There are some data that have a string (S) instead of a proper sales amount, and some are empty. Just don't insert these sales.

                    sales_amount_float = float(sales_amount.replace(",", ""))

                    data = [kind_of_business_id, sales_amount_float, month, year]
                    print(data)

                    # The SQL query to insert the data
                    query = "INSERT INTO sales (kind_of_business_id, sales_amount, month, year) VALUES (%s, %s, %s, %s)"

                    # Execute the query
                    cursor.execute(query, data)

                    # Commit the transaction
                    cnx.commit()

sales ['480,301', '478,467', '478,267', '407,227', '504,607', '532,678', '549,416', '545,307', '530,987', '553,114', '543,273', '611,429']
[1, 480301.0, 1, 2020]
[1, 478467.0, 2, 2020]
[1, 478267.0, 3, 2020]
[1, 407227.0, 4, 2020]
[1, 504607.0, 5, 2020]
[1, 532678.0, 6, 2020]
[1, 549416.0, 7, 2020]
[1, 545307.0, 8, 2020]
[1, 530987.0, 9, 2020]
[1, 553114.0, 10, 2020]
[1, 543273.0, 11, 2020]
[1, 611429.0, 12, 2020]
sales ['386,934', '380,639', '395,880', '337,693', '397,245', '418,450', '433,183', '429,379', '417,929', '438,435', '440,053', '494,937']
[2, 386934.0, 1, 2020]
[2, 380639.0, 2, 2020]
[2, 395880.0, 3, 2020]
[2, 337693.0, 4, 2020]
[2, 397245.0, 5, 2020]
[2, 418450.0, 6, 2020]
[2, 433183.0, 7, 2020]
[2, 429379.0, 8, 2020]
[2, 417929.0, 9, 2020]
[2, 438435.0, 10, 2020]
[2, 440053.0, 11, 2020]
[2, 494937.0, 12, 2020]
sales ['440,605', '441,504', '444,006', '380,417', '472,562', '496,364', '510,540', '506,577', '493,756', '514,953', '508,879', '574,794']
[3, 440605.0, 1, 2020]
[3

In [None]:
# # Close the cursor and connection
cursor.close()
cnx.close()