<h2><center>STAGE 5 - LOADING FACTS</center></h2>

---

In [1]:
import os
import pandas as pd
import psycopg2 as pg

In [2]:
from utils.constants import host, user, password, database_name

In [3]:
from utils.constants import fact_tables_folder, fact_order_line_file, fact_order_header_file

<h3>1. DATABASE CONNECTION</h3>

In [4]:
os.environ["PGGSSENCMODE"] = "disable"

conn = pg.connect(host=host,database=database_name, user=user, password=password)
cursor = conn.cursor()

<h3>2. READING THE FACT TABLE FILES FROM STAGE 3</h3>

In [5]:
fact_order_line = pd.read_csv(os.path.join(fact_tables_folder, fact_order_line_file))
fact_order_header = pd.read_csv(os.path.join(fact_tables_folder, fact_order_header_file))

<h3>3. CREATING THE TABLES FOR THE FACTS IN POSTGRESQL</h3>

In [6]:
cursor.execute("DROP TABLE IF EXISTS fact_order_line;")
cursor.execute("DROP TABLE IF EXISTS fact_order_header;")

<h4>Fact Table <sup>factOrderLineAccumulating</sup></h4>

In [7]:
fact_order_line.dtypes

ORDER_NUMBER                  int64
ORDER_LINE_NUMBER             int64
CUSTOMER_KEY                  int64
CUSTOMER_GEOLOCATION_KEY      int64
SELLER_KEY                    int64
SELLER_GEOLOCATION_KEY        int64
PRODUCT_KEY                   int64
PURCHASE_DATE_KEY             int64
APPROVAL_DATE_KEY             int64
DELIVERY_DATE_KEY             int64
ORDER_LINE_QUANTITY           int64
UNIT_PRICE                  float64
FREIGHT_COST                float64
DELIVERY_TIME                 int64
dtype: object

In [8]:
fact_order_line.head()

Unnamed: 0,ORDER_NUMBER,ORDER_LINE_NUMBER,CUSTOMER_KEY,CUSTOMER_GEOLOCATION_KEY,SELLER_KEY,SELLER_GEOLOCATION_KEY,PRODUCT_KEY,PURCHASE_DATE_KEY,APPROVAL_DATE_KEY,DELIVERY_DATE_KEY,ORDER_LINE_QUANTITY,UNIT_PRICE,FREIGHT_COST,DELIVERY_TIME
0,1,1,70297,1048,560,4130,2350,641,641,649,1,29.99,8.72,8
1,2,1,77028,10823,550,8235,20551,936,938,950,1,118.7,22.76,12
2,3,1,555,15301,2618,5454,12229,951,951,960,1,159.9,19.22,9
3,4,1,61082,12334,2990,8269,29926,688,688,702,1,45.0,27.2,13
4,5,1,67264,4098,1497,4022,11901,775,775,778,1,19.9,8.72,2


In [9]:
sql_fact_order_line = """

CREATE TABLE fact_order_line (
  order_number                 INTEGER,
  order_line_number            INTEGER         NOT NULL,
  customer_key                 INTEGER         NOT NULL,
  customer_geolocation_key     INTEGER         NOT NULL,
  seller_key                   INTEGER         NOT NULL,
  seller_geolocation_key       INTEGER         NOT NULL,
  product_key                  INTEGER         NOT NULL,
  purchase_date_key            INTEGER         NOT NULL,
  approval_date_key            INTEGER         NOT NULL,
  delivery_date_key            INTEGER         NOT NULL,
--
  order_line_quantity          INTEGER         NOT NULL,
  unit_price                   NUMERIC(10,2)   NOT NULL,
  freight_cost                 NUMERIC(8,2)    NOT NULL,
  delivery_time                INTEGER         NOT NULL,
--
  CONSTRAINT pk_fact_order_line
    PRIMARY KEY (customer_key, customer_geolocation_key, seller_key, seller_geolocation_key, \
                 product_key, purchase_date_key, approval_date_key, delivery_date_key),
--
  CONSTRAINT ck_fact_order_line_order_number
    CHECK (order_number > 0),
--
  CONSTRAINT fk_fact_order_line_customer_key
    FOREIGN KEY (customer_key)
    REFERENCES dim_customer (customer_key),
--
  CONSTRAINT fk_fact_order_line_customer_geolocation_key
    FOREIGN KEY (customer_geolocation_key)
    REFERENCES dim_geolocation (geolocation_key),
--
  CONSTRAINT fk_fact_order_line_seller_key
    FOREIGN KEY (seller_key)
    REFERENCES dim_seller (seller_key),
--
  CONSTRAINT fk_fact_order_line_seller_geolocation_key
    FOREIGN KEY (seller_geolocation_key)
    REFERENCES dim_geolocation (geolocation_key),
--
  CONSTRAINT fk_fact_order_line_product_key
    FOREIGN KEY (product_key)
    REFERENCES dim_product (product_key),
--
  CONSTRAINT fk_fact_order_line_purchase_date_key
    FOREIGN KEY (purchase_date_key)
    REFERENCES dim_date (date_key),
--
  CONSTRAINT fk_fact_order_line_approval_date_key
    FOREIGN KEY (approval_date_key)
    REFERENCES dim_date (date_key),
--
  CONSTRAINT fk_fact_order_line_delivery_date_key
    FOREIGN KEY (delivery_date_key)
    REFERENCES dim_date (date_key),
--
  CONSTRAINT ck_fact_order_line_quantity
    CHECK (order_line_quantity > 0),
--
  CONSTRAINT ck_fact_order_line_unit_price
    CHECK (unit_price > 0),
--
  CONSTRAINT ck_fact_order_line_freight_cost
    CHECK (freight_cost >= 0)
);
"""

cursor.execute(sql_fact_order_line)

<h4>Fact Table <sup>factOrderHeaderAccumulating</sup></h4>

In [10]:
fact_order_header.dtypes

ORDER_NUMBER                  int64
CUSTOMER_KEY                  int64
CUSTOMER_GEOLOCATION_KEY      int64
PURCHASE_DATE_KEY             int64
APPROVAL_DATE_KEY             int64
DELIVERY_DATE_KEY             int64
ORDER_INDICATOR_KEY           int64
ORDER_AMOUNT                float64
ORDER_ITEM_QUANTITY           int64
REVIEW_SCORE                float64
DELIVERY_TIME                 int64
dtype: object

In [11]:
fact_order_header.head()

Unnamed: 0,ORDER_NUMBER,CUSTOMER_KEY,CUSTOMER_GEOLOCATION_KEY,PURCHASE_DATE_KEY,APPROVAL_DATE_KEY,DELIVERY_DATE_KEY,ORDER_INDICATOR_KEY,ORDER_AMOUNT,ORDER_ITEM_QUANTITY,REVIEW_SCORE,DELIVERY_TIME
0,1,70297,1048,641,641,649,5,18.59,1,4.0,8
1,2,77028,10823,936,938,950,3,141.46,1,4.0,12
2,3,555,15301,951,951,960,1,179.12,1,5.0,9
3,4,61082,12334,688,688,702,1,72.2,1,5.0,13
4,5,67264,4098,775,775,778,1,28.62,1,5.0,2


In [12]:
sql_fact_order_header = """

CREATE TABLE fact_order_header (
  order_number                 INTEGER,
  customer_key                 INTEGER         NOT NULL,
  customer_geolocation_key     INTEGER         NOT NULL,
  purchase_date_key            INTEGER         NOT NULL,
  approval_date_key            INTEGER         NOT NULL,
  delivery_date_key            INTEGER         NOT NULL,
  order_indicator_key          INTEGER         NOT NULL,
--
  order_amount                 NUMERIC(10,2)   NOT NULL,
  order_item_quantity          INTEGER         NOT NULL,
  review_score                 INTEGER         NOT NULL,
  delivery_time                INTEGER         NOT NULL,
--
  CONSTRAINT pk_fact_order_header
    PRIMARY KEY (customer_key, customer_geolocation_key, purchase_date_key, approval_date_key, delivery_date_key, order_indicator_key),
--
  CONSTRAINT fk_fact_order_header_customer_key
    FOREIGN KEY (customer_key)
    REFERENCES dim_customer(customer_key),
--
  CONSTRAINT fk_fact_order_header_customer_geolocation_key
    FOREIGN KEY (customer_geolocation_key)
    REFERENCES dim_geolocation(geolocation_key),
--
  CONSTRAINT fk_fact_order_header_purchase_date_key
    FOREIGN KEY (purchase_date_key)
    REFERENCES dim_date(date_key),
--
  CONSTRAINT fk_fact_order_header_approval_date_key
    FOREIGN KEY (approval_date_key)
    REFERENCES dim_date(date_key),
--
  CONSTRAINT fk_fact_order_header_delivery_date_key
    FOREIGN KEY (delivery_date_key)
    REFERENCES dim_date(date_key),
--
  CONSTRAINT fk_fact_order_header_order_indicator_key
    FOREIGN KEY (order_indicator_key)
    REFERENCES dim_order_indicator(order_indicator_key),
--
  CONSTRAINT ck_fact_order_header_order_amount
    CHECK (order_amount > 0),
--
  CONSTRAINT ck_fact_order_header_order_item_quantity
    CHECK (order_item_quantity > 0),
--
  CONSTRAINT ck_fact_order_header_review_score
    CHECK (review_score IN (0, 1, 2, 3, 4, 5))
);
"""

cursor.execute(sql_fact_order_header)

<h3>4. INSERTING THE DATA INTO THE FACT TABLES IN POSTGRESQL</h3>

In [13]:
fact_order_line_list = fact_order_line.to_numpy().tolist()

sql = "INSERT INTO fact_order_line VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"

cursor.executemany(sql, fact_order_line_list)

In [14]:
fact_order_header_list = fact_order_header.to_numpy().tolist()

sql = "INSERT INTO fact_order_header VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"

cursor.executemany(sql, fact_order_header_list)

In [15]:
conn.commit()

In [16]:
cursor.close()
conn.close()