### Best Practices for Data Loading

In [93]:
# import modules
import sqlite3
import os

In [94]:
# demo data
laundry_mat_data = [
    {"id":1, "product": "Detergent", "dollar_price_per_unit": 4.5, "quantity": 100, "total_cost": 200},
    {"id":2, "product": "Dryer Sheets Box", "dollar_price_per_unit": 3.5, "quantity": 100, "total_cost": 350},
    {"id":3, "product": "Washing Machine", "dollar_price_per_unit": 400, "quantity": 25, "total_cost": 10000},
    {"id":4, "product": "Dryer", "dollar_price_per_unit": 400, "quantity": 25, "total_cost": 10000},
]

# demo data
laundry_mat_data_2 = [
    {"id":5, "product": "Soap", "dollar_price_per_unit": 1.5, "quantity": 100, "total_cost": 150},
    {"id":6, "product": "Dryer Sheets Bundle", "dollar_price_per_unit": 13.5, "quantity": 100, "total_cost": 1350},
    {"id":7, "product": "Wash Rag", "dollar_price_per_unit": 2, "quantity": 25, "total_cost": 50},
    {"id":8, "product": "Blow Dryer", "dollar_price_per_unit": 40, "quantity": 25, "total_cost": 1000},
]

# demo data
laundry_mat_data_dupes = [
    {"id":1, "product": "Detergent", "dollar_price_per_unit": 4.5, "quantity": 100, "total_cost": 200},
    {"id":2, "product": "Dryer Sheets Box", "dollar_price_per_unit": 3.5, "quantity": 100, "total_cost": 350},
    {"id":9, "product": "Scrubber Brush", "dollar_price_per_unit": 2, "quantity": 25, "total_cost": 50},
    {"id":10, "product": "Laundry Tabs", "dollar_price_per_unit": 40, "quantity": 25, "total_cost": 1000},
]

In [95]:
#create new sqlite3 database named laundry_mat.db and table named laundry_mat in current directory
# Define the path to the database
db_path = os.path.join(os.getcwd(), 'laundry_mat.db')

# Connect to the SQLite database. If it doesn't exist, it will be created in the current working directory.
conn = sqlite3.connect(db_path)

# Create a cursor object using the cursor method
cursor = conn.cursor()

# SQL command to create a table
create_table_sql = '''
CREATE TABLE IF NOT EXISTS laundry_mat (
    id int primary key,
    product string,
    dollar_price_per_unit TEXT decimal(10, 2),
    quantity INTEGER,
    total_cost decimal(10, 2)
);
'''
# Execute the SQL command to create the table
cursor.execute(create_table_sql)

conn.commit()
conn.close()
print(f"Database and table created at {db_path}")

Database and table created at C:\Users\john.dees\PycharmProjects\Building-ETL-Pipelines-with-Python\Chapters\chapter_06\laundry_mat.db


In [96]:
# conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
# # Create a cursor object using the cursor method
# cursor = conn.cursor()
# 
# # drop table laundry_mat if it exists
# cursor.execute("DROP TABLE IF EXISTS laundry_mat")
# 
# conn.commit()
# conn.close()

In [97]:
conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
cursor = conn.cursor()

# Truncate the existing data from the table "laundry_mat"
cursor.execute("DELETE FROM laundry_mat")
conn.commit()
conn.close()



In [98]:
conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
cursor = conn.cursor()

# Insert new data
for record in laundry_mat_data:
    cursor.execute("INSERT INTO laundry_mat (id, product, dollar_price_per_unit, quantity, total_cost) VALUES (?, ?, ?, ?, ?)",
                   (record["id"], record["product"], record["dollar_price_per_unit"], record["quantity"], record["total_cost"]))
conn.commit()
conn.close()

In [99]:
#read from laundry_mat table
conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
cursor = conn.cursor()

# Select all records from the table
cursor.execute("SELECT * FROM laundry_mat")

# Fetch all records
records = cursor.fetchall()


if not records:
    print("No records found in the table")
else:
    # Print the records
    for record in records:
        print(record)
        #print 'no records' if no records
    
conn.close()


(1, 'Detergent', '4.5', 100, 200)
(2, 'Dryer Sheets Box', '3.5', 100, 350)
(3, 'Washing Machine', '400', 25, 10000)
(4, 'Dryer', '400', 25, 10000)


Full Data Load

In [100]:
def perform_full_data_load(laundry_mat_data):
    conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
    cursor = conn.cursor()

    # Truncate the existing data
    cursor.execute("DELETE FROM laundry_mat")

    # Insert new data
    for record in laundry_mat_data:
        cursor.execute("INSERT INTO laundry_mat (id, product, dollar_price_per_unit, quantity, total_cost) VALUES (?, ?, ?, ?, ?)",
                   (record["id"], record["product"], record["dollar_price_per_unit"], record["quantity"], record["total_cost"]))

    conn.commit()
    conn.close()

In [101]:
perform_full_data_load(laundry_mat_data)

Incremental Data Load

In [102]:
def perform_incremental_data_load(laundry_mat_data):
    conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
    cursor = conn.cursor()

    # Insert new data (if the product doesn't already exist ... the "or ignore" part will skip dupes that would violate a unique constraint)
    for record in laundry_mat_data_2:
        cursor.execute("INSERT OR IGNORE INTO laundry_mat (id, product, dollar_price_per_unit, quantity, total_cost) VALUES (?, ?, ?, ?, ?)",
                   (record["id"], record["product"], record["dollar_price_per_unit"], record["quantity"], record["total_cost"]))

    for record in laundry_mat_data_dupes:
        cursor.execute("INSERT OR IGNORE INTO laundry_mat (id, product, dollar_price_per_unit, quantity, total_cost) VALUES (?, ?, ?, ?, ?)",
                   (record["id"], record["product"], record["dollar_price_per_unit"], record["quantity"], record["total_cost"]))

    conn.commit()
    conn.close()

In [103]:
perform_incremental_data_load(laundry_mat_data)

In [104]:
#read from laundry_mat table
conn = sqlite3.connect("laundry_mat.db")  # Connect to the database
cursor = conn.cursor()

# Select all records from the table
cursor.execute("SELECT * FROM laundry_mat")

# Fetch all records
records = cursor.fetchall()


if not records:
    print("No records found in the table")
else:
    # Print the records
    for record in records:
        print(record)
        #print 'no records' if no records
    
conn.close()

(1, 'Detergent', '4.5', 100, 200)
(2, 'Dryer Sheets Box', '3.5', 100, 350)
(3, 'Washing Machine', '400', 25, 10000)
(4, 'Dryer', '400', 25, 10000)
(5, 'Soap', '1.5', 100, 150)
(6, 'Dryer Sheets Bundle', '13.5', 100, 1350)
(7, 'Wash Rag', '2', 25, 50)
(8, 'Blow Dryer', '40', 25, 1000)
(9, 'Scrubber Brush', '2', 25, 50)
(10, 'Laundry Tabs', '40', 25, 1000)
