In [157]:
import pandas as pd
from connection import connect

In [158]:
#connection
SCHEMA='sales'
co_oltp,etl_conn,etl_conn_or=connect()

## Extract

In [159]:
fact_internet_sales=pd.read_sql("""
SELECT  p.product_number, 
sh.order_date, 
sh.due_date, 
sh.ship_date,
sh.territory_id,
so.sales_order_detail_id,
sh.sales_order_number,
cu.account_number,
COALESCE(cr.to_currency_code, 'USD') AS to_currency_code,
ROW_NUMBER() OVER (PARTITION BY sh.sales_order_number ORDER BY so.sales_order_detail_id) AS sales_order_line_number,
so.order_qty,
so.unit_price,
(so.unit_price * so.order_qty) AS extended_amount,
so.unit_price_discount,
(so.unit_price * so.unit_price_discount * so.order_qty) AS discount_amount,
p.standard_cost AS product_standard_cost,
(so.order_qty * p.standard_cost) AS total_product_cost,
(so.unit_price *(1-so.unit_price_discount)*so.order_qty) AS sales_amount,
sh.tax_amt,
sh.freight,
so.carrier_tracking_number,
sh.purchase_order_number AS customer_po_number
                                
 



FROM sales.sales_order_header as sh
JOIN sales.sales_order_detail as so
ON sh.sales_order_id=so.sales_order_id
JOIN production.product as p
ON so.product_id=p.product_id
LEFT JOIN sales.currency_rate as cr
ON sh.currency_rate_id=cr.currency_rate_id
JOIN sales.customer as cu
ON sh.customer_id=cu.customer_id
WHERE sh.online_order_flag=true
                                
 """,co_oltp)

t_special_offer = pd.read_sql("""
    SELECT p.product_number, sod.special_offer_id
    FROM sales.sales_order_detail AS sod
    JOIN production.product AS p
      ON sod.product_id = p.product_id
    JOIN sales.sales_order_header AS soh
      ON sod.sales_order_id = soh.sales_order_id
    WHERE soh.online_order_flag = true
""", co_oltp)



dim_product=pd.read_sql_table('dim_product',etl_conn)

dim_customer=pd.read_sql_table('dim_customer',etl_conn_or)

dim_promotion=pd.read_sql_table('dim_promotion',etl_conn)

dim_sales_territory=pd.read_sql_table('dim_sales_territory',etl_conn)


## Transform

In [160]:
#delete duplicates in t_special_offer and dim_product

dim_product=dim_product.drop_duplicates(subset=['product_alternate_key'])

t_special_offer.drop_duplicates(subset=['product_number'],inplace=True)

In [161]:
# join dim_product

fact_internet_sales=fact_internet_sales.merge(
    dim_product[['product_key','product_alternate_key']],
    left_on='product_number',
    right_on='product_alternate_key',
    how='left'
    
)

fact_internet_sales

Unnamed: 0,product_number,order_date,due_date,ship_date,territory_id,sales_order_detail_id,sales_order_number,account_number,to_currency_code,sales_order_line_number,...,discount_amount,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key
0,BK-R93R-62,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,353,SO43697,AW00021768,CAD,1,...,0.0,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,1824,BK-R93R-62
1,BK-M82S-44,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,354,SO43698,AW00028389,FRF,1,...,0.0,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44
2,BK-M82S-44,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,355,SO43699,AW00025863,USD,1,...,0.0,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44
3,BK-R50B-62,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,356,SO43700,AW00014501,USD,1,...,0.0,486.7066,486.7066,699.0982,55.9279,17.4775,,,1850,BK-R50B-62
4,BK-M82S-44,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,357,SO43701,AW00011003,AUD,1,...,0.0,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,FE-6654,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121313,SO75122,AW00015868,USD,1,...,0.0,8.2205,8.2205,21.9800,2.4776,0.7743,,,1999,FE-6654
60394,CA-1098,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121314,SO75122,AW00015868,USD,2,...,0.0,6.9223,6.9223,8.9900,2.4776,0.7743,,,1737,CA-1098
60395,FE-6654,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121315,SO75123,AW00018759,USD,1,...,0.0,8.2205,8.2205,21.9800,15.1976,4.7493,,,1999,FE-6654
60396,ST-1401,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121316,SO75123,AW00018759,USD,2,...,0.0,59.4660,59.4660,159.0000,15.1976,4.7493,,,2000,ST-1401


In [162]:
#join whit dim_product

fact_internet_sales=fact_internet_sales.merge(
    t_special_offer,
    on='product_number',
    how='left'
).drop(['product_number'],axis=1)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_detail_id,sales_order_number,account_number,to_currency_code,sales_order_line_number,order_qty,...,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,special_offer_id
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,353,SO43697,AW00021768,CAD,1,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,1824,BK-R93R-62,1
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,354,SO43698,AW00028389,FRF,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,355,SO43699,AW00025863,USD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,356,SO43700,AW00014501,USD,1,1,...,486.7066,486.7066,699.0982,55.9279,17.4775,,,1850,BK-R50B-62,1
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,357,SO43701,AW00011003,AUD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121313,SO75122,AW00015868,USD,1,1,...,8.2205,8.2205,21.9800,2.4776,0.7743,,,1999,FE-6654,1
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121314,SO75122,AW00015868,USD,2,1,...,6.9223,6.9223,8.9900,2.4776,0.7743,,,1737,CA-1098,1
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121315,SO75123,AW00018759,USD,1,1,...,8.2205,8.2205,21.9800,15.1976,4.7493,,,1999,FE-6654,1
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121316,SO75123,AW00018759,USD,2,1,...,59.4660,59.4660,159.0000,15.1976,4.7493,,,2000,ST-1401,1


In [163]:
#join whit dim_promotion

fact_internet_sales=fact_internet_sales.merge(
    dim_promotion[['promotion_alternate_key','promotion_key']],
    left_on='special_offer_id',
    right_on='promotion_alternate_key',
    how='left'
).drop(['special_offer_id','promotion_alternate_key'],axis=1)

fact_internet_sales



Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_detail_id,sales_order_number,account_number,to_currency_code,sales_order_line_number,order_qty,...,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,353,SO43697,AW00021768,CAD,1,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,1824,BK-R93R-62,1
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,354,SO43698,AW00028389,FRF,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,355,SO43699,AW00025863,USD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,356,SO43700,AW00014501,USD,1,1,...,486.7066,486.7066,699.0982,55.9279,17.4775,,,1850,BK-R50B-62,1
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,357,SO43701,AW00011003,AUD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121313,SO75122,AW00015868,USD,1,1,...,8.2205,8.2205,21.9800,2.4776,0.7743,,,1999,FE-6654,1
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121314,SO75122,AW00015868,USD,2,1,...,6.9223,6.9223,8.9900,2.4776,0.7743,,,1737,CA-1098,1
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121315,SO75123,AW00018759,USD,1,1,...,8.2205,8.2205,21.9800,15.1976,4.7493,,,1999,FE-6654,1
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121316,SO75123,AW00018759,USD,2,1,...,59.4660,59.4660,159.0000,15.1976,4.7493,,,2000,ST-1401,1


In [164]:
#rename the columns to propper name in olap database

# fact_internet_sales.rename(columns={'order_date':'order_date_key',
#                                     'due_date':'due_date_key',
#                                     'ship_date':'ship_date_key',    
#                                     },inplace=True)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_detail_id,sales_order_number,account_number,to_currency_code,sales_order_line_number,order_qty,...,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,353,SO43697,AW00021768,CAD,1,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,1824,BK-R93R-62,1
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,354,SO43698,AW00028389,FRF,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,355,SO43699,AW00025863,USD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,356,SO43700,AW00014501,USD,1,1,...,486.7066,486.7066,699.0982,55.9279,17.4775,,,1850,BK-R50B-62,1
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,357,SO43701,AW00011003,AUD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121313,SO75122,AW00015868,USD,1,1,...,8.2205,8.2205,21.9800,2.4776,0.7743,,,1999,FE-6654,1
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121314,SO75122,AW00015868,USD,2,1,...,6.9223,6.9223,8.9900,2.4776,0.7743,,,1737,CA-1098,1
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121315,SO75123,AW00018759,USD,1,1,...,8.2205,8.2205,21.9800,15.1976,4.7493,,,1999,FE-6654,1
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121316,SO75123,AW00018759,USD,2,1,...,59.4660,59.4660,159.0000,15.1976,4.7493,,,2000,ST-1401,1


In [165]:
#join currency

dim_currency=pd.read_sql_table('dim_currency',etl_conn)

fact_internet_sales=fact_internet_sales.merge(
    dim_currency[['currency_key','currency_alternate_key']],
    left_on='to_currency_code',
    right_on='currency_alternate_key',
    how='left'
).drop(['currency_alternate_key','to_currency_code'],axis=1)

fact_internet_sales


Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_detail_id,sales_order_number,account_number,sales_order_line_number,order_qty,unit_price,...,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key,currency_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,353,SO43697,AW00021768,1,1,3578.2700,...,2171.2942,3578.2700,286.2616,89.4568,,,1824,BK-R93R-62,1,19
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,354,SO43698,AW00028389,1,1,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1,39
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,355,SO43699,AW00025863,1,1,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1,100
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,356,SO43700,AW00014501,1,1,699.0982,...,486.7066,699.0982,55.9279,17.4775,,,1850,BK-R50B-62,1,100
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,357,SO43701,AW00011003,1,1,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121313,SO75122,AW00015868,1,1,21.9800,...,8.2205,21.9800,2.4776,0.7743,,,1999,FE-6654,1,100
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121314,SO75122,AW00015868,2,1,8.9900,...,6.9223,8.9900,2.4776,0.7743,,,1737,CA-1098,1,100
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121315,SO75123,AW00018759,1,1,21.9800,...,8.2205,21.9800,15.1976,4.7493,,,1999,FE-6654,1,100
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,121316,SO75123,AW00018759,2,1,159.0000,...,59.4660,159.0000,15.1976,4.7493,,,2000,ST-1401,1,100


In [166]:
# join sales territoryKey

fact_internet_sales=fact_internet_sales.merge(
    dim_sales_territory[['sales_territory_key','sales_territory_alternate_key']],
    left_on='territory_id',
    right_on='sales_territory_alternate_key',
    how='left'
).drop(['territory_id','sales_territory_alternate_key'],axis=1)

fact_internet_sales





Unnamed: 0,order_date,due_date,ship_date,sales_order_detail_id,sales_order_number,account_number,sales_order_line_number,order_qty,unit_price,extended_amount,...,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key,currency_key,sales_territory_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,353,SO43697,AW00021768,1,1,3578.2700,3578.2700,...,3578.2700,286.2616,89.4568,,,1824,BK-R93R-62,1,19,6
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,354,SO43698,AW00028389,1,1,3399.9900,3399.9900,...,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1,39,7
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,355,SO43699,AW00025863,1,1,3399.9900,3399.9900,...,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1,100,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,356,SO43700,AW00014501,1,1,699.0982,699.0982,...,699.0982,55.9279,17.4775,,,1850,BK-R50B-62,1,100,4
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,357,SO43701,AW00011003,1,1,3399.9900,3399.9900,...,3399.9900,271.9992,84.9998,,,1860,BK-M82S-44,1,6,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121313,SO75122,AW00015868,1,1,21.9800,21.9800,...,21.9800,2.4776,0.7743,,,1999,FE-6654,1,100,6
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121314,SO75122,AW00015868,2,1,8.9900,8.9900,...,8.9900,2.4776,0.7743,,,1737,CA-1098,1,100,6
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121315,SO75123,AW00018759,1,1,21.9800,21.9800,...,21.9800,15.1976,4.7493,,,1999,FE-6654,1,100,6
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121316,SO75123,AW00018759,2,1,159.0000,159.0000,...,159.0000,15.1976,4.7493,,,2000,ST-1401,1,100,6


In [167]:
#RevisionNumber

# fact_internet_sales['revision_number']=1



In [168]:
#drop unusefull variables

fact_internet_sales.drop(['product_alternate_key'],axis=1,inplace=True)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,sales_order_detail_id,sales_order_number,account_number,sales_order_line_number,order_qty,unit_price,extended_amount,...,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,promotion_key,currency_key,sales_territory_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,353,SO43697,AW00021768,1,1,3578.2700,3578.2700,...,2171.2942,3578.2700,286.2616,89.4568,,,1824,1,19,6
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,354,SO43698,AW00028389,1,1,3399.9900,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,1860,1,39,7
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,355,SO43699,AW00025863,1,1,3399.9900,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,1860,1,100,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,356,SO43700,AW00014501,1,1,699.0982,699.0982,...,486.7066,699.0982,55.9279,17.4775,,,1850,1,100,4
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,357,SO43701,AW00011003,1,1,3399.9900,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,1860,1,6,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121313,SO75122,AW00015868,1,1,21.9800,21.9800,...,8.2205,21.9800,2.4776,0.7743,,,1999,1,100,6
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121314,SO75122,AW00015868,2,1,8.9900,8.9900,...,6.9223,8.9900,2.4776,0.7743,,,1737,1,100,6
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121315,SO75123,AW00018759,1,1,21.9800,21.9800,...,8.2205,21.9800,15.1976,4.7493,,,1999,1,100,6
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121316,SO75123,AW00018759,2,1,159.0000,159.0000,...,59.4660,159.0000,15.1976,4.7493,,,2000,1,100,6


In [169]:
# convert dates to integers

fact_internet_sales['order_date_key']=fact_internet_sales['order_date'].dt.strftime('%Y%m%d').astype(int)
fact_internet_sales['due_date_key']=fact_internet_sales['due_date'].dt.strftime('%Y%m%d').astype(int)
fact_internet_sales['ship_date_key']=fact_internet_sales['ship_date'].dt.strftime('%Y%m%d').astype(int)

In [170]:
#customer_key

fact_internet_sales=fact_internet_sales.merge(
    dim_customer[['customer_key','customer_alternate_key']],
    left_on='account_number',
    right_on='customer_alternate_key',
    how='left' 
)

fact_internet_sales=fact_internet_sales.drop(columns=['customer_alternate_key','account_number'])

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,sales_order_detail_id,sales_order_number,sales_order_line_number,order_qty,unit_price,extended_amount,unit_price_discount,...,carrier_tracking_number,customer_po_number,product_key,promotion_key,currency_key,sales_territory_key,order_date_key,due_date_key,ship_date_key,customer_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,353,SO43697,1,1,3578.2700,3578.2700,0.0,...,,,1824,1,19,6,20110531,20110612,20110607,21768
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,354,SO43698,1,1,3399.9900,3399.9900,0.0,...,,,1860,1,39,7,20110531,20110612,20110607,28389
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,355,SO43699,1,1,3399.9900,3399.9900,0.0,...,,,1860,1,100,1,20110531,20110612,20110607,25863
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,356,SO43700,1,1,699.0982,699.0982,0.0,...,,,1850,1,100,4,20110531,20110612,20110607,14501
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,357,SO43701,1,1,3399.9900,3399.9900,0.0,...,,,1860,1,6,9,20110531,20110612,20110607,11003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121313,SO75122,1,1,21.9800,21.9800,0.0,...,,,1999,1,100,6,20140630,20140712,20140707,15868
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121314,SO75122,2,1,8.9900,8.9900,0.0,...,,,1737,1,100,6,20140630,20140712,20140707,15868
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121315,SO75123,1,1,21.9800,21.9800,0.0,...,,,1999,1,100,6,20140630,20140712,20140707,18759
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,121316,SO75123,2,1,159.0000,159.0000,0.0,...,,,2000,1,100,6,20140630,20140712,20140707,18759


In [None]:
# fact_internet_sales_or=pd.read_sql_table('fact_internet_sales',etl_conn_or)

fact_internet_sales=fact_internet_sales.rename(columns={'order_qty':'order_quantity','customer_po_number':'customer_ponumber'})

missing_columns=set(fact_internet_sales_or.columns)-set(fact_internet_sales.columns)

missing_columns


{'customer_ponumber',
 'order_quantity',
 'revision_number',
 'unit_price_discount_pct'}

In [180]:
fact_internet_sales.columns

Index(['order_date', 'due_date', 'ship_date', 'sales_order_detail_id',
       'sales_order_number', 'sales_order_line_number', 'order_qty',
       'unit_price', 'extended_amount', 'unit_price_discount',
       'discount_amount', 'product_standard_cost', 'total_product_cost',
       'sales_amount', 'tax_amt', 'freight', 'carrier_tracking_number',
       'customer_po_number', 'product_key', 'promotion_key', 'currency_key',
       'sales_territory_key', 'order_date_key', 'due_date_key',
       'ship_date_key', 'customer_key'],
      dtype='object')

## Load

In [176]:
fact_internet_sales.to_sql('fact_internet_sales',etl_conn_or,if_exists='append',index=False)

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "sales_order_detail_id" of relation "fact_internet_sales" does not exist
LINE 1: ..._internet_sales (order_date, due_date, ship_date, sales_orde...
                                                             ^

[SQL: INSERT INTO fact_internet_sales (order_date, due_date, ship_date, sales_order_detail_id, sales_order_number, sales_order_line_number, order_qty, unit_price, extended_amount, unit_price_discount, discount_amount, product_standard_cost, total_product_c ... 654249 characters truncated ... 99)s, %(order_date_key__999)s, %(due_date_key__999)s, %(ship_date_key__999)s, %(customer_key__999)s)]
[parameters: {'extended_amount__0': 3578.27, 'freight__0': 89.4568, 'promotion_key__0': 1, 'sales_order_number__0': 'SO43697', 'tax_amt__0': 286.2616, 'due_date__0': datetime.datetime(2011, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), 'order_date_key__0': 20110531, 'product_key__0': 1824, 'ship_date__0': datetime.datetime(2011, 6, 7, 0, 0, tzinfo=datetime.timezone.utc), 'unit_price_discount__0': 0.0, 'currency_key__0': 19, 'unit_price__0': 3578.27, 'order_date__0': datetime.datetime(2011, 5, 31, 0, 0, tzinfo=datetime.timezone.utc), 'order_qty__0': 1, 'carrier_tracking_number__0': None, 'sales_territory_key__0': 6, 'due_date_key__0': 20110612, 'sales_amount__0': 3578.27, 'discount_amount__0': 0.0, 'sales_order_detail_id__0': 353, 'customer_po_number__0': None, 'sales_order_line_number__0': 1, 'ship_date_key__0': 20110607, 'total_product_cost__0': 2171.2942, 'customer_key__0': 21768, 'product_standard_cost__0': 2171.2942, 'extended_amount__1': 3399.99, 'freight__1': 84.9998, 'promotion_key__1': 1, 'sales_order_number__1': 'SO43698', 'tax_amt__1': 271.9992, 'due_date__1': datetime.datetime(2011, 6, 12, 0, 0, tzinfo=datetime.timezone.utc), 'order_date_key__1': 20110531, 'product_key__1': 1860, 'ship_date__1': datetime.datetime(2011, 6, 7, 0, 0, tzinfo=datetime.timezone.utc), 'unit_price_discount__1': 0.0, 'currency_key__1': 39, 'unit_price__1': 3399.99, 'order_date__1': datetime.datetime(2011, 5, 31, 0, 0, tzinfo=datetime.timezone.utc), 'order_qty__1': 1, 'carrier_tracking_number__1': None, 'sales_territory_key__1': 7, 'due_date_key__1': 20110612, 'sales_amount__1': 3399.99, 'discount_amount__1': 0.0, 'sales_order_detail_id__1': 354, 'customer_po_number__1': None, 'sales_order_line_number__1': 1, 'ship_date_key__1': 20110607, 'total_product_cost__1': 1912.1544 ... 25900 parameters truncated ... 'promotion_key__998': 1, 'sales_order_number__998': 'SO45023', 'tax_amt__998': 286.2616, 'due_date__998': datetime.datetime(2011, 12, 10, 0, 0, tzinfo=datetime.timezone.utc), 'order_date_key__998': 20111128, 'product_key__998': 1824, 'ship_date__998': datetime.datetime(2011, 12, 5, 0, 0, tzinfo=datetime.timezone.utc), 'unit_price_discount__998': 0.0, 'currency_key__998': 29, 'unit_price__998': 3578.27, 'order_date__998': datetime.datetime(2011, 11, 28, 0, 0, tzinfo=datetime.timezone.utc), 'order_qty__998': 1, 'carrier_tracking_number__998': None, 'sales_territory_key__998': 8, 'due_date_key__998': 20111210, 'sales_amount__998': 3578.27, 'discount_amount__998': 0.0, 'sales_order_detail_id__998': 5137, 'customer_po_number__998': None, 'sales_order_line_number__998': 1, 'ship_date_key__998': 20111205, 'total_product_cost__998': 2171.2942, 'customer_key__998': 13807, 'product_standard_cost__998': 2171.2942, 'extended_amount__999': 3399.99, 'freight__999': 84.9998, 'promotion_key__999': 1, 'sales_order_number__999': 'SO45024', 'tax_amt__999': 271.9992, 'due_date__999': datetime.datetime(2011, 12, 11, 0, 0, tzinfo=datetime.timezone.utc), 'order_date_key__999': 20111129, 'product_key__999': 1858, 'ship_date__999': datetime.datetime(2011, 12, 6, 0, 0, tzinfo=datetime.timezone.utc), 'unit_price_discount__999': 0.0, 'currency_key__999': 29, 'unit_price__999': 3399.99, 'order_date__999': datetime.datetime(2011, 11, 29, 0, 0, tzinfo=datetime.timezone.utc), 'order_qty__999': 1, 'carrier_tracking_number__999': None, 'sales_territory_key__999': 8, 'due_date_key__999': 20111211, 'sales_amount__999': 3399.99, 'discount_amount__999': 0.0, 'sales_order_detail_id__999': 5138, 'customer_po_number__999': None, 'sales_order_line_number__999': 1, 'ship_date_key__999': 20111206, 'total_product_cost__999': 1912.1544, 'customer_key__999': 29475, 'product_standard_cost__999': 1912.1544}]
(Background on this error at: https://sqlalche.me/e/20/f405)