## Loading the database into an RDBMS(SQLite)

In [30]:
# Import required libraries 

import pandas as pd
import os
import sqlite3
from sqlite3 import Error

In [31]:
#Location where existing CSVs from last step are stored
path = '/Users/penguin/Desktop/project/cab/ecommerce/Tables'


In [32]:
# Create a connection using connect() method 

sales_db = sqlite3.connect('superstore_database.db')

In [33]:
# A cursor object is called to be capable to send commands to the SQL

cursor = sales_db.cursor()

In [34]:
# Create a table in the database

#create the (empty) tables which will be included in the database

#Customers
cursor.execute('''
CREATE TABLE IF NOT EXISTS "Customers" (
	"CustomerID"	CHAR(8),
	"CustomerName"	VARCHAR(30),
	"Segment"	VARCHAR(15),
	PRIMARY KEY("CustomerID")
);
''')

#Address
cursor.execute('''
CREATE TABLE IF NOT EXISTS "Address" (
	"CustomerID"	CHAR(8),
	"Region"	VARCHAR(7),
	"State"	VARCHAR(20),
	"City"	VARCHAR(30),
	"PostalCode"	MEDIUMINT,
	"AddressID"	CHAR(7),
	PRIMARY KEY("AddressID"),
    FOREIGN KEY (CustomerID) REFERENCES "Customers"(CustomerID)
);
''')

#Orders
cursor.execute('''
CREATE TABLE IF NOT EXISTS "Orders" (
	"OrderID"	CHAR(14),
	"CustomerID"	CHAR(8),
	"OrderDate"	DATE,
	"AddressID"	CHAR(7),
    "ShipDate"	DATE,
    "ShipMode"	VARCHAR(20),
	PRIMARY KEY("OrderID"),
    FOREIGN KEY (CustomerID) REFERENCES "Customers"(CustomerID)
);
''')

#Category
cursor.execute('''
CREATE TABLE IF NOT EXISTS "Category" (
	"Category"	VARCHAR(20),
	"SubCategory"	VARCHAR(20),
	"CategoryID"	CHAR(5),
	PRIMARY KEY("CategoryID")
);
''')


#Products
cursor.execute('''
CREATE TABLE IF NOT EXISTS "Products" (
	"ProductID"	CHAR(15),
	"ProductName"	VARCHAR(200),
	"CataloguePrice"	MEDIUMINT,
	"SupplierPrice"	MEDIUMINT,
	"CategoryID"	CHAR(5),
	PRIMARY KEY("ProductID"),
    FOREIGN KEY (CategoryID) REFERENCES "Category"(CategoryID)
);
''')


#Order Details
cursor.execute('''
CREATE TABLE IF NOT EXISTS "OrderDetails" (
	"OrderID"	CHAR(14),
	"ProductID"	CHAR(15),
	"Quantity"	TINYINT,
	"Sales"	INT,
	"Discount"	INT,
	"Profit"	INT,
	PRIMARY KEY("OrderID","ProductID"),
    FOREIGN KEY (ProductID) REFERENCES "Products"(ProductID),
    FOREIGN KEY (OrderID) REFERENCES "Orders"(OrderID)
);
''')

<sqlite3.Cursor at 0x11d1134c0>

In [35]:
# Populate the empty tables. First step is converting the CSVs created in the previous notebook.

csv_list = [os.path.splitext(filename)[0] for filename in os.listdir(path)]



In [36]:
tables = [pd.read_csv(f'{path}{name}.csv') for name in csv_list]

In [37]:
table_dict = dict(zip(csv_list, tables))

In [38]:
table_list = iter(table_dict.keys())
table_contents = iter(table_dict.values())


#Here we are inserting the data into our predefined table structure. To do this, we use if_exists='append'. 'replace' would also remove our already defined structure. As we do not want an index, we also add index=False.
for i in range (len(table_dict)):
    next(table_contents).to_sql((next(table_list)), sales_db, if_exists='append', index=False)

In [39]:
#Now we can commit these changes and perform queries on the database!
sales_db.commit()

In [40]:
#Always best practice to close the connection once you're done.
sales_db.close()