#### Imports

In [None]:
import psycopg2
import configparser
import csv

from pathlib import Path

#### Carregar parametros da conexão com o banco de dados do arquivo de configuração

In [None]:
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

PGSQL_HOST        = config.get("PGSQL","PGSQL_HOST")
PGSQL_PORT        = config.get("PGSQL","PGSQL_PORT")
PGSQL_DBNAME      = config.get("PGSQL","PGSQL_DBNAME")
PGSQL_USER        = config.get("PGSQL","PGSQL_USER")
PGSQL_PASSWORD    = config.get("PGSQL","PGSQL_PASSWORD")

#### Criar a conexão com o banco PostgresSQL

In [None]:
# Create a connection
try: 
    conn = psycopg2.connect(f"host={PGSQL_HOST} dbname={PGSQL_DBNAME} user={PGSQL_USER} password={PGSQL_PASSWORD}")
except psycopg2.Error as e: 
    print("Error: Could not make connection to the Postgres database")
    print(e)
    
# get a cursor    
try: 
    cur = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not get curser to the Database")
    print(e)
    
# set the autocommit to true    
conn.set_session(autocommit=True)

### CREATE TABLE Queries

In [None]:
person_table = """
    CREATE TABLE IF NOT EXISTS Person
    (
        BusinessEntityID INTEGER NOT NULL,
        PersonType VARCHAR(10),
        NameStyle INTEGER,
        Title VARCHAR(10),
        FirstName VARCHAR(50),
        MiddleName VARCHAR(50),
        LastName VARCHAR(50),
        Suffix VARCHAR(10),
        EmailPromotion INTEGER,
        AdditionalContactInfo VARCHAR(2000),
        Demographics VARCHAR(1000),
        rowguid VARCHAR(36),
        ModifiedDate TIMESTAMP,
        PRIMARY KEY(BusinessEntityID)
    )
"""

product_table = """
    CREATE TABLE IF NOT EXISTS Product
    (
        ProductID INTEGER NOT NULL,
        Name VARCHAR(50),
        ProductNumber VARCHAR(10),
        MakeFlag INTEGER,
        FinishedGoodsFlag INTEGER,
        Color VARCHAR(20),
        SafetyStockLevel INTEGER,
        ReorderPoint INTEGER,
        StandardCost FLOAT,
        ListPrice FLOAT,
        Size VARCHAR(10),
        SizeUnitMeasureCode VARCHAR(10),
        WeightUnitMeasureCode VARCHAR(10),
        Weight FLOAT,
        DaysToManufacture INTEGER,
        ProductLine VARCHAR(10),
        Class VARCHAR(10),
        Style VARCHAR(10),
        ProductSubcategoryID INTEGER,
        ProductModelID INTEGER,
        SellStartDate TIMESTAMP,
        SellEndDate TIMESTAMP,
        DiscontinuedDate TIMESTAMP,
        rowguid VARCHAR(36),
        ModifiedDate TIMESTAMP,
        PRIMARY KEY (ProductID)
    )
"""

customer_table = """
    CREATE TABLE IF NOT EXISTS Customer
    (
        CustomerID INTEGER NOT NULL,
        PersonID INTEGER,
        StoreID INTEGER,
        TerritoryID INTEGER,
        AccountNumber VARCHAR(10),
        rowguid VARCHAR(36),
        ModifiedDate TIMESTAMP,
        PRIMARY KEY (CustomerID),
        FOREIGN KEY (PersonID) REFERENCES Person(BusinessEntityID)
    )
"""

salesorderheader_table = """
    CREATE TABLE IF NOT EXISTS SalesOrderHeader
    (
        SalesOrderID INTEGER NOT NULL,
        RevisionNumber INTEGER,
        OrderDate TIMESTAMP,
        DueDate TIMESTAMP,
        ShipDate TIMESTAMP,
        Status INTEGER,
        OnlineOrderFlag INTEGER,
        SalesOrderNumber VARCHAR(10),
        PurchaseOrderNumber VARCHAR(20),
        AccountNumber VARCHAR(20),
        CustomerID INTEGER,
        SalesPersonID INTEGER,
        TerritoryID INTEGER,
        BillToAddressID INTEGER,
        ShipToAddressID INTEGER,
        ShipMethodID INTEGER,
        CreditCardID INTEGER,
        CreditCardApprovalCode VARCHAR(20),
        CurrencyRateID INTEGER,
        SubTotal FLOAT,
        TaxAmt FLOAT,
        Freight FLOAT,
        TotalDue FLOAT,
        Comment VARCHAR(10),
        rowguid VARCHAR(36),
        ModifiedDate TIMESTAMP,
        PRIMARY KEY (SalesOrderID),
        FOREIGN KEY (CustomerID) REFERENCES Customer(CustomerID)
    )
"""

specialofferproduct_table = """
    CREATE TABLE IF NOT EXISTS SpecialOfferProduct
    (
        SpecialOfferID INTEGER NOT NULL,
        ProductID INTEGER NOT NULL,
        rowguid VARCHAR(36),
        ModifiedDate TIMESTAMP,
        PRIMARY KEY (SpecialOfferID, ProductID),
        FOREIGN KEY (ProductID) REFERENCES Product(ProductID)
    )
"""

salesorderdetail_table = """
    CREATE TABLE IF NOT EXISTS SalesOrderDetail
    (
        SalesOrderID INTEGER NOT NULL,
        SalesOrderDetailID INTEGER NOT NULL,
        CarrierTrackingNumber VARCHAR(20),
        OrderQty INTEGER,
        ProductID INTEGER,
        SpecialOfferID INTEGER,
        UnitPrice FLOAT,
        UnitPriceDiscount FLOAT,
        LineTotal FLOAT,
        rowguid VARCHAR(36),
        ModifiedDate TIMESTAMP,
        PRIMARY KEY (SalesOrderID, SalesOrderDetailID),
        FOREIGN KEY (SalesOrderID) 
            REFERENCES SalesOrderHeader(SalesOrderID),
        FOREIGN KEY (SpecialOfferID, ProductID) 
            REFERENCES SpecialOfferProduct(SpecialOfferID, ProductID),
        FOREIGN KEY (ProductID) 
            REFERENCES Product(ProductID)
    )
"""

create_queries = [ 
    person_table, 
    product_table, 
    customer_table, 
    salesorderheader_table, 
    specialofferproduct_table,
    salesorderdetail_table
]

#### Criar Tabelas

In [None]:
for query in create_queries:
    try: 
        cur.execute(query)
    except psycopg2.Error as e: 
        print("Error: Issue creating table")
        print (e)   

### Importação dos dados para as tabelas criadas

#### Person

In [None]:
try: 
    with open('data/Person.Person.csv', 'r') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader) # Skip the header row.
        for row in reader:
            cur.execute(
            "INSERT INTO Person VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
            [None if x == 'NULL' else x for x in row])
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

#### Product

In [None]:
try: 
    with open('data/Production.Product.csv', 'r') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader) # Skip the header row.
        for row in reader:
            row[8] = row[8].replace(',', '.')
            row[9] = row[9].replace(',', '.')
            cur.execute(
            "INSERT INTO Product VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
            [None if x == 'NULL' else x for x in row])
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

#### Customer

In [None]:
try: 
    with open('data/Sales.Customer.csv', 'r') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader) # Skip the header row.
        for row in reader:
            cur.execute(
            "INSERT INTO Customer VALUES (%s, %s, %s, %s, %s, %s, %s)",
            [None if x == 'NULL' else x for x in row])
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

#### SalesOrderHeader

In [None]:
try: 
    with open('data/Sales.SalesOrderHeader.csv', 'r') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader) # Skip the header row.
        for row in reader:
            row[19] = row[19].replace(',', '.')
            row[20] = row[20].replace(',', '.')
            row[21] = row[21].replace(',', '.')
            row[22] = row[22].replace(',', '.')
            cur.execute(
            "INSERT INTO SalesOrderHeader VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
            [None if x == 'NULL' else x for x in row])
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

#### SpecialOfferProduct

In [None]:
try: 
    with open('data/Sales.SpecialOfferProduct.csv', 'r') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader) # Skip the header row.
        for row in reader:
            cur.execute(
            "INSERT INTO SpecialOfferProduct VALUES (%s, %s, %s, %s)",
            [None if x == 'NULL' else x for x in row])
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

#### SalesOrderDetail

In [None]:
try: 
    with open('data/Sales.SalesOrderDetail.csv', 'r') as f:
        reader = csv.reader(f, delimiter=';')
        next(reader) # Skip the header row.
        for row in reader:
            row[6] = row[6].replace(',', '.')
            row[7] = row[7].replace(',', '.')
            row[8] = row[8].replace(',', '.')
            cur.execute(
            "INSERT INTO SalesOrderDetail VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
            [None if x == 'NULL' else x for x in row])
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

### Checando Dados Importados

In [None]:
table_names = [
    'Person',
    'Product',
    'Customer',
    'SalesOrderHeader', 
    'SpecialOfferProduct', 
    'SalesOrderDetail',
]                    

In [None]:
def check_row(table_name):
    try: 
        cur.execute(f"SELECT * FROM {table_name} LIMIT 1;")
    except psycopg2.Error as e: 
        print("Error: select *")
        print (e)

    print(cur.fetchall())
    
    
def check_qty(table_name):
    try: 
        cur.execute(f"SELECT count(*) FROM {table_name};")
    except psycopg2.Error as e: 
        print("Error: select *")
        print (e)

    print(cur.fetchall())

In [None]:
for name in table_names:
    print(f"Checking table {name}")
    check_qty(name)
    check_row(name)
    print("+------------------------+")    
    

Checking table Person
[(19972,)]
[(1, 'EM', 0, None, 'Ken', 'J', 'Sánchez', None, 0, None, '<IndividualSurvey xmlns="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/IndividualSurvey"><TotalPurchaseYTD>0</TotalPurchaseYTD></IndividualSurvey>', '92C4279F-1207-48A3-8448-4636514EB7E2', datetime.datetime(2009, 1, 7, 0, 0))]
+------------------------+
Checking table Product
[(504,)]
[(1, 'Adjustable Race', 'AR-5381', 0, 0, None, 1000, 750, 0.0, 0.0, None, None, None, None, 0, None, None, None, None, None, datetime.datetime(2008, 4, 30, 0, 0), None, None, '694215B7-08F7-4C0D-ACB1-D734BA44C0C8', datetime.datetime(2014, 2, 8, 10, 1, 36, 827000))]
+------------------------+
Checking table Customer
[(19820,)]
[(1, None, 934, 1, 'AW00000001', '3F5AE95E-B87D-4AED-95B4-C3797AFCB74F', datetime.datetime(2014, 9, 12, 11, 15, 7, 263000))]
+------------------------+
Checking table SalesOrderHeader
[(31465,)]
[(43659, 8, datetime.datetime(2011, 5, 31, 0, 0), datetime.datetime(2011, 6, 12, 0

In [None]:
query = """
    SELECT COUNT(SalesOrderID) AS OrderQty
    FROM (
        SELECT SalesOrderID, COUNT(SalesOrderID) AS Qty
        FROM SalesOrderDetail
        GROUP BY SalesOrderID
    ) AS sub
    WHERE Qty > 2
"""

In [None]:
cur.execute(query)
print(cur.fetchall())

[(12757,)]


In [None]:
query = """
    SELECT Name, SUM(OrderQty) AS OrderQtyTotal, DaysToManufacture
    FROM Product AS P
    JOIN SalesOrderDetail AS D ON D.ProductID = P.ProductID
    JOIN SpecialOfferProduct AS S ON S.ProductID = P.ProductID 
        AND S.SpecialOfferID = D.SpecialOfferID
    GROUP BY Name, DaysToManufacture
    ORDER BY OrderQtyTotal DESC
    LIMIT 3
"""

In [None]:
cur.execute(query)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

('AWC Logo Cap', 8311, 0)
('Water Bottle - 30 oz.', 6815, 0)
('Sport-100 Helmet, Blue', 6743, 0)


### Apagar Tabelas (USE APENAS SE APÓS FINALIZAR ANÁLISE!!!)

In [None]:
try: 
    cur.execute("""
                DROP TABLE 
                    SalesOrderDetail, 
                    SpecialOfferProduct, 
                    SalesOrderHeader, 
                    Customer,
                    Product,
                    Person;
                """)
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)