# SQL connection

Once data is processed (see TablePrep file), SQL tables are created in the DB and data is sent to using the SQL connector.  

In [None]:
# relevant modules

import pandas as pd
import os
import sqlite3
from sqlite3 import Error

In [2]:
# locating path for reference in code  
path = "C:\\Users\\EWF\\OneDrive\\Documentos\\CABerlin\\Project 6\\Project 6_v2\\"

if not os.path.exists(path):
    raise FileNotFoundError(f"Directory '{path}' does not exist.")

## Step 1 - SQL Table creation

In [None]:
# def create_connection(db_file):
#     """ create a database connection to a SQLite database """
#     conn = None
#     try:
#         conn = sqlite3.connect(db_file)
#         print(sqlite3.version)
#     except Error as e:
#         print(e)
#     finally:
#         if conn:
#             conn.close()


# if __name__ == '__main__':
#     create_connection(r"C:\sqlite\db\pythonsqlite.db")

In [3]:
#  creating an empty sqlite database to populate with the newly created dataframes
sales_db = sqlite3.connect("superstore_database.db")

#Alternate method
# import pyodbc
# sales_db = pyodbc.connect('Driver={SQL Server};'
#                      'Server=KILLBOT-LAPTOP\SQLEXPRESS;'
#                      'Database=test_database;'
#                      'Trusted_Connection=yes;')

In [4]:
# Establishing a cursor object (a tool that executes SQL code against the database)
c = sales_db.cursor()

In [5]:
#create the (empty) tables which will be included in the database

#Customers
c.execute('''
CREATE TABLE IF NOT EXISTS "Customers" (
	"CustomerID"	CHAR(8),
	"CustomerName"	VARCHAR(30),
	"Segment"	VARCHAR(15),
	"Sex" CHAR(2),
	PRIMARY KEY("CustomerID")
);
''')

#Address
c.execute('''
CREATE TABLE IF NOT EXISTS "Address" (
	"CustomerID"	CHAR(8),
	"Region"	VARCHAR(7),
	"State"	VARCHAR(20),
	"City"	VARCHAR(30),
	"PostalCode"	MEDIUMINT,
	"AddressID"	CHAR(7),
	PRIMARY KEY("AddressID"),
    FOREIGN KEY (CustomerID) REFERENCES "Customers"(CustomerID)
);
''')

#Orders
c.execute('''
CREATE TABLE IF NOT EXISTS "Orders" (
	"OrderID"	CHAR(14),
	"CustomerID"	CHAR(8),
	"OrderDate"	DATE,
	"AddressID"	CHAR(7),
    "ShipDate"	DATE,
    "ShipMode"	VARCHAR(20),
	PRIMARY KEY("OrderID"),
    FOREIGN KEY (CustomerID) REFERENCES "Customers"(CustomerID)
);
''')

#Category
c.execute('''
CREATE TABLE IF NOT EXISTS "Category" (
	"Category"	VARCHAR(20),
	"SubCategory"	VARCHAR(20),
	"CategoryID"	CHAR(5),
	PRIMARY KEY("CategoryID")
);
''')


#Products
c.execute('''
CREATE TABLE IF NOT EXISTS "Products" (
	"ProductID"	CHAR(15),
	"ProductName"	VARCHAR(200),
	"CataloguePrice"	MEDIUMINT,
	"SupplierPrice"	MEDIUMINT,
	"CategoryID"	CHAR(5),
	PRIMARY KEY("ProductID"),
    FOREIGN KEY (CategoryID) REFERENCES "Category"(CategoryID)
);
''')


#Order Details
c.execute('''
CREATE TABLE IF NOT EXISTS "OrderDetails" (
	"OrderID"	CHAR(14),
	"ProductID"	CHAR(15),
	"Quantity"	TINYINT,
	"Sales"	INT,
	"Discount"	INT,
	"Profit"	INT,
	PRIMARY KEY("OrderID","ProductID"),
    FOREIGN KEY (ProductID) REFERENCES "Products"(ProductID),
    FOREIGN KEY (OrderID) REFERENCES "Orders"(OrderID)
);
''')

<sqlite3.Cursor at 0x1e673b697c0>

## Step 2 - Table population 

In [6]:
csv_list = [os.path.splitext(filename)[0] for filename in os.listdir(path)]

In [8]:
# tables = [pd.read_csv(f'{path}{name}.csv') for name in csv_list] - this code raised an error so adjusting it below
csv_list = ["Customers","Address", "Orders", "Category", "Products", "OrderDetails"]
 
tables = [pd.read_csv(os.path.join(path, f"{name}.csv")) for name in csv_list]


In [11]:
table_dict = dict(zip(csv_list, tables))

In [12]:
table_list = iter(table_dict.keys())
table_contents = iter(table_dict.values())


#Here we are inserting the data into our predefined table structure. To do this, we use if_exists='append'. 'replace' would also remove our already defined structure. As we do not want an index, we also add index=False.
for i in range (len(table_dict)):
    next(table_contents).to_sql((next(table_list)), sales_db, if_exists='append', index=False)

In [13]:
#Now we can commit these changes and perform queries on the database!
sales_db.commit()

In [14]:
#Always best practice to close the connection once you're done.
sales_db.close()