In [1]:
import pandas as pd
from connection import connect

In [2]:
#connection
co_oltp,etl_conn,_=connect()

## Extract

In [20]:
fact_internet_sales=pd.read_sql("""
SELECT  p.product_number, 
sh.order_date, 
sh.due_date, 
sh.ship_date,
sh.territory_id,
sh.sales_order_number,
sh.revision_number,
cu.account_number,
COALESCE(cr.to_currency_code, 'USD') AS to_currency_code,
ROW_NUMBER() OVER (PARTITION BY sh.sales_order_number ORDER BY so.sales_order_detail_id) AS sales_order_line_number,
so.order_qty AS order_quantity,
so.unit_price,
so.unit_price_discount AS unit_price_discount_pct,
(so.unit_price * so.order_qty) AS extended_amount,
(so.unit_price * so.unit_price_discount * so.order_qty) AS discount_amount,
p.standard_cost AS product_standard_cost,
(so.order_qty * p.standard_cost) AS total_product_cost,
(so.unit_price *(1-so.unit_price_discount)*so.order_qty) AS sales_amount,
sh.tax_amt,
sh.freight,
so.carrier_tracking_number,
sh.purchase_order_number AS customer_po_number
                                
 



FROM sales.sales_order_header as sh
JOIN sales.sales_order_detail as so
ON sh.sales_order_id=so.sales_order_id
JOIN production.product as p
ON so.product_id=p.product_id
LEFT JOIN sales.currency_rate as cr
ON sh.currency_rate_id=cr.currency_rate_id
JOIN sales.customer as cu
ON sh.customer_id=cu.customer_id
WHERE sh.online_order_flag=true
                                
 """,co_oltp)

t_special_offer = pd.read_sql("""
    SELECT p.product_number, sod.special_offer_id
    FROM sales.sales_order_detail AS sod
    JOIN production.product AS p
      ON sod.product_id = p.product_id
    JOIN sales.sales_order_header AS soh
      ON sod.sales_order_id = soh.sales_order_id
    WHERE soh.online_order_flag = true
""", co_oltp)



dim_product=pd.read_sql_table('dim_product',etl_conn)

dim_customer=pd.read_sql_table('dim_customer',etl_conn)

dim_promotion=pd.read_sql_table('dim_promotion',etl_conn)

dim_sales_territory=pd.read_sql_table('dim_sales_territory',etl_conn)


## Transform

In [21]:
#delete duplicates in t_special_offer and dim_product

dim_product=dim_product.drop_duplicates(subset=['product_alternate_key'])

t_special_offer.drop_duplicates(subset=['product_number'],inplace=True)

In [22]:
# join dim_product

fact_internet_sales=fact_internet_sales.merge(
    dim_product[['product_key','product_alternate_key']],
    left_on='product_number',
    right_on='product_alternate_key',
    how='left'
    
)

fact_internet_sales

Unnamed: 0,product_number,order_date,due_date,ship_date,territory_id,sales_order_number,revision_number,account_number,to_currency_code,sales_order_line_number,...,discount_amount,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key
0,BK-R93R-62,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,SO43697,8,AW00021768,CAD,1,...,0.0,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,310,BK-R93R-62
1,BK-M82S-44,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,SO43698,8,AW00028389,FRF,1,...,0.0,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44
2,BK-M82S-44,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,SO43699,8,AW00025863,USD,1,...,0.0,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44
3,BK-R50B-62,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,SO43700,8,AW00014501,USD,1,...,0.0,486.7066,486.7066,699.0982,55.9279,17.4775,,,336,BK-R50B-62
4,BK-M82S-44,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,SO43701,8,AW00011003,AUD,1,...,0.0,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,FE-6654,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,USD,1,...,0.0,8.2205,8.2205,21.9800,2.4776,0.7743,,,485,FE-6654
60394,CA-1098,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,USD,2,...,0.0,6.9223,6.9223,8.9900,2.4776,0.7743,,,223,CA-1098
60395,FE-6654,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,USD,1,...,0.0,8.2205,8.2205,21.9800,15.1976,4.7493,,,485,FE-6654
60396,ST-1401,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,USD,2,...,0.0,59.4660,59.4660,159.0000,15.1976,4.7493,,,486,ST-1401


In [23]:
#join whit dim_product

fact_internet_sales=fact_internet_sales.merge(
    t_special_offer,
    on='product_number',
    how='left'
).drop(['product_number'],axis=1)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_number,revision_number,account_number,to_currency_code,sales_order_line_number,order_quantity,...,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,special_offer_id
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,SO43697,8,AW00021768,CAD,1,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,310,BK-R93R-62,1
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,SO43698,8,AW00028389,FRF,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,SO43699,8,AW00025863,USD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,SO43700,8,AW00014501,USD,1,1,...,486.7066,486.7066,699.0982,55.9279,17.4775,,,336,BK-R50B-62,1
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,SO43701,8,AW00011003,AUD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,USD,1,1,...,8.2205,8.2205,21.9800,2.4776,0.7743,,,485,FE-6654,1
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,USD,2,1,...,6.9223,6.9223,8.9900,2.4776,0.7743,,,223,CA-1098,1
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,USD,1,1,...,8.2205,8.2205,21.9800,15.1976,4.7493,,,485,FE-6654,1
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,USD,2,1,...,59.4660,59.4660,159.0000,15.1976,4.7493,,,486,ST-1401,1


In [24]:
#join whit dim_promotion

fact_internet_sales=fact_internet_sales.merge(
    dim_promotion[['promotion_alternate_key','promotion_key']],
    left_on='special_offer_id',
    right_on='promotion_alternate_key',
    how='left'
).drop(['special_offer_id','promotion_alternate_key'],axis=1)

fact_internet_sales



Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_number,revision_number,account_number,to_currency_code,sales_order_line_number,order_quantity,...,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,SO43697,8,AW00021768,CAD,1,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,310,BK-R93R-62,1
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,SO43698,8,AW00028389,FRF,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,SO43699,8,AW00025863,USD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,SO43700,8,AW00014501,USD,1,1,...,486.7066,486.7066,699.0982,55.9279,17.4775,,,336,BK-R50B-62,1
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,SO43701,8,AW00011003,AUD,1,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,USD,1,1,...,8.2205,8.2205,21.9800,2.4776,0.7743,,,485,FE-6654,1
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,USD,2,1,...,6.9223,6.9223,8.9900,2.4776,0.7743,,,223,CA-1098,1
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,USD,1,1,...,8.2205,8.2205,21.9800,15.1976,4.7493,,,485,FE-6654,1
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,USD,2,1,...,59.4660,59.4660,159.0000,15.1976,4.7493,,,486,ST-1401,1


In [25]:
#join currency

dim_currency=pd.read_sql_table('dim_currency',etl_conn)

fact_internet_sales=fact_internet_sales.merge(
    dim_currency[['currency_key','currency_alternate_key']],
    left_on='to_currency_code',
    right_on='currency_alternate_key',
    how='left'
).drop(['currency_alternate_key','to_currency_code'],axis=1)

fact_internet_sales


Unnamed: 0,order_date,due_date,ship_date,territory_id,sales_order_number,revision_number,account_number,sales_order_line_number,order_quantity,unit_price,...,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key,currency_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,6,SO43697,8,AW00021768,1,1,3578.2700,...,2171.2942,3578.2700,286.2616,89.4568,,,310,BK-R93R-62,1,19
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,7,SO43698,8,AW00028389,1,1,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,39
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,1,SO43699,8,AW00025863,1,1,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,100
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,4,SO43700,8,AW00014501,1,1,699.0982,...,486.7066,699.0982,55.9279,17.4775,,,336,BK-R50B-62,1,100
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,9,SO43701,8,AW00011003,1,1,3399.9900,...,1912.1544,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,1,1,21.9800,...,8.2205,21.9800,2.4776,0.7743,,,485,FE-6654,1,100
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75122,8,AW00015868,2,1,8.9900,...,6.9223,8.9900,2.4776,0.7743,,,223,CA-1098,1,100
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,1,1,21.9800,...,8.2205,21.9800,15.1976,4.7493,,,485,FE-6654,1,100
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,6,SO75123,8,AW00018759,2,1,159.0000,...,59.4660,159.0000,15.1976,4.7493,,,486,ST-1401,1,100


In [26]:
# join sales territoryKey

fact_internet_sales=fact_internet_sales.merge(
    dim_sales_territory[['sales_territory_key','sales_territory_alternate_key']],
    left_on='territory_id',
    right_on='sales_territory_alternate_key',
    how='left'
).drop(['territory_id','sales_territory_alternate_key'],axis=1)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,sales_order_number,revision_number,account_number,sales_order_line_number,order_quantity,unit_price,unit_price_discount_pct,...,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key,currency_key,sales_territory_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43697,8,AW00021768,1,1,3578.2700,0.0,...,3578.2700,286.2616,89.4568,,,310,BK-R93R-62,1,19,6
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43698,8,AW00028389,1,1,3399.9900,0.0,...,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,39,7
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43699,8,AW00025863,1,1,3399.9900,0.0,...,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,100,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43700,8,AW00014501,1,1,699.0982,0.0,...,699.0982,55.9279,17.4775,,,336,BK-R50B-62,1,100,4
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43701,8,AW00011003,1,1,3399.9900,0.0,...,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,6,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,8,AW00015868,1,1,21.9800,0.0,...,21.9800,2.4776,0.7743,,,485,FE-6654,1,100,6
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,8,AW00015868,2,1,8.9900,0.0,...,8.9900,2.4776,0.7743,,,223,CA-1098,1,100,6
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,8,AW00018759,1,1,21.9800,0.0,...,21.9800,15.1976,4.7493,,,485,FE-6654,1,100,6
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,8,AW00018759,2,1,159.0000,0.0,...,159.0000,15.1976,4.7493,,,486,ST-1401,1,100,6


In [27]:
#RevisionNumber

fact_internet_sales['revision_number']=fact_internet_sales['revision_number'].apply(lambda x: 1 if x==8 else 2)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,sales_order_number,revision_number,account_number,sales_order_line_number,order_quantity,unit_price,unit_price_discount_pct,...,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,product_alternate_key,promotion_key,currency_key,sales_territory_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43697,1,AW00021768,1,1,3578.2700,0.0,...,3578.2700,286.2616,89.4568,,,310,BK-R93R-62,1,19,6
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43698,1,AW00028389,1,1,3399.9900,0.0,...,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,39,7
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43699,1,AW00025863,1,1,3399.9900,0.0,...,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,100,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43700,1,AW00014501,1,1,699.0982,0.0,...,699.0982,55.9279,17.4775,,,336,BK-R50B-62,1,100,4
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43701,1,AW00011003,1,1,3399.9900,0.0,...,3399.9900,271.9992,84.9998,,,346,BK-M82S-44,1,6,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,1,AW00015868,1,1,21.9800,0.0,...,21.9800,2.4776,0.7743,,,485,FE-6654,1,100,6
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,1,AW00015868,2,1,8.9900,0.0,...,8.9900,2.4776,0.7743,,,223,CA-1098,1,100,6
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,1,AW00018759,1,1,21.9800,0.0,...,21.9800,15.1976,4.7493,,,485,FE-6654,1,100,6
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,1,AW00018759,2,1,159.0000,0.0,...,159.0000,15.1976,4.7493,,,486,ST-1401,1,100,6


In [28]:
#drop unusefull variables

fact_internet_sales.drop(['product_alternate_key'],axis=1,inplace=True)

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,sales_order_number,revision_number,account_number,sales_order_line_number,order_quantity,unit_price,unit_price_discount_pct,...,total_product_cost,sales_amount,tax_amt,freight,carrier_tracking_number,customer_po_number,product_key,promotion_key,currency_key,sales_territory_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43697,1,AW00021768,1,1,3578.2700,0.0,...,2171.2942,3578.2700,286.2616,89.4568,,,310,1,19,6
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43698,1,AW00028389,1,1,3399.9900,0.0,...,1912.1544,3399.9900,271.9992,84.9998,,,346,1,39,7
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43699,1,AW00025863,1,1,3399.9900,0.0,...,1912.1544,3399.9900,271.9992,84.9998,,,346,1,100,1
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43700,1,AW00014501,1,1,699.0982,0.0,...,486.7066,699.0982,55.9279,17.4775,,,336,1,100,4
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43701,1,AW00011003,1,1,3399.9900,0.0,...,1912.1544,3399.9900,271.9992,84.9998,,,346,1,6,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,1,AW00015868,1,1,21.9800,0.0,...,8.2205,21.9800,2.4776,0.7743,,,485,1,100,6
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,1,AW00015868,2,1,8.9900,0.0,...,6.9223,8.9900,2.4776,0.7743,,,223,1,100,6
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,1,AW00018759,1,1,21.9800,0.0,...,8.2205,21.9800,15.1976,4.7493,,,485,1,100,6
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,1,AW00018759,2,1,159.0000,0.0,...,59.4660,159.0000,15.1976,4.7493,,,486,1,100,6


In [29]:
# convert dates to integers

fact_internet_sales['order_date_key']=fact_internet_sales['order_date'].dt.strftime('%Y%m%d').astype(int)
fact_internet_sales['due_date_key']=fact_internet_sales['due_date'].dt.strftime('%Y%m%d').astype(int)
fact_internet_sales['ship_date_key']=fact_internet_sales['ship_date'].dt.strftime('%Y%m%d').astype(int)

In [30]:
#customer_key

fact_internet_sales=fact_internet_sales.merge(
    dim_customer[['customer_key','customer_alternate_key']],
    left_on='account_number',
    right_on='customer_alternate_key',
    how='left' 
)

fact_internet_sales=fact_internet_sales.drop(columns=['customer_alternate_key','account_number'])

fact_internet_sales

Unnamed: 0,order_date,due_date,ship_date,sales_order_number,revision_number,sales_order_line_number,order_quantity,unit_price,unit_price_discount_pct,extended_amount,...,carrier_tracking_number,customer_po_number,product_key,promotion_key,currency_key,sales_territory_key,order_date_key,due_date_key,ship_date_key,customer_key
0,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43697,1,1,1,3578.2700,0.0,3578.2700,...,,,310,1,19,6,20110531,20110612,20110607,11785
1,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43698,1,1,1,3399.9900,0.0,3399.9900,...,,,346,1,39,7,20110531,20110612,20110607,13226
2,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43699,1,1,1,3399.9900,0.0,3399.9900,...,,,346,1,100,1,20110531,20110612,20110607,8260
3,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43700,1,1,1,699.0982,0.0,699.0982,...,,,336,1,100,4,20110531,20110612,20110607,8918
4,2011-05-31 00:00:00+00:00,2011-06-12 00:00:00+00:00,2011-06-07 00:00:00+00:00,SO43701,1,1,1,3399.9900,0.0,3399.9900,...,,,346,1,6,9,20110531,20110612,20110607,9065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60393,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,1,1,1,21.9800,0.0,21.9800,...,,,485,1,100,6,20140630,20140712,20140707,3288
60394,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75122,1,2,1,8.9900,0.0,8.9900,...,,,223,1,100,6,20140630,20140712,20140707,3288
60395,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,1,1,1,21.9800,0.0,21.9800,...,,,485,1,100,6,20140630,20140712,20140707,2696
60396,2014-06-30 00:00:00+00:00,2014-07-12 00:00:00+00:00,2014-07-07 00:00:00+00:00,SO75123,1,2,1,159.0000,0.0,159.0000,...,,,486,1,100,6,20140630,20140712,20140707,2696


In [31]:
len(fact_internet_sales.columns)

26

In [32]:
fact_internet_sales.columns

Index(['order_date', 'due_date', 'ship_date', 'sales_order_number',
       'revision_number', 'sales_order_line_number', 'order_quantity',
       'unit_price', 'unit_price_discount_pct', 'extended_amount',
       'discount_amount', 'product_standard_cost', 'total_product_cost',
       'sales_amount', 'tax_amt', 'freight', 'carrier_tracking_number',
       'customer_po_number', 'product_key', 'promotion_key', 'currency_key',
       'sales_territory_key', 'order_date_key', 'due_date_key',
       'ship_date_key', 'customer_key'],
      dtype='object')

## Load

In [33]:
fact_internet_sales.to_sql('fact_internet_sales',etl_conn, schema='dw', if_exists='append',index=False)

398