#### Getting Query Results into the Table
- `CTAS`, `INSERT`
- `MERGE` : supported only with delta format 

In [0]:
%sql
use itversity_retail_db

In [0]:
%sql
show tables

database,tableName,isTemporary
itversity_retail_db,daily_revenue,False
itversity_retail_db,daily_revenue_stg,False
itversity_retail_db,order_items,False
itversity_retail_db,orders,False


In [0]:
%sql
select * from orders limit 10

order_id,order_date,order_customer_id,order_status
1,2013-07-25 00:00:00.0,11599,CLOSED
2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT
3,2013-07-25 00:00:00.0,12111,COMPLETE
4,2013-07-25 00:00:00.0,8827,CLOSED
5,2013-07-25 00:00:00.0,11318,COMPLETE
6,2013-07-25 00:00:00.0,7130,COMPLETE
7,2013-07-25 00:00:00.0,4530,COMPLETE
8,2013-07-25 00:00:00.0,2911,PROCESSING
9,2013-07-25 00:00:00.0,5657,PENDING_PAYMENT
10,2013-07-25 00:00:00.0,5648,PENDING_PAYMENT


In [0]:
%sql
 select * from order_items limit 10

order_item,order_item_order_id,order_item_product_id,order_item_quantity,order_item_subtotal,order_item_product_price
1,1,957,1,299.98,299.98
2,2,1073,1,199.99,199.99
3,2,502,5,250.0,50.0
4,2,403,1,129.99,129.99
5,4,897,2,49.98,24.99
6,4,365,5,299.95,59.99
7,4,502,3,150.0,50.0
8,4,1014,4,199.92,49.98
9,5,957,1,299.98,299.98
10,5,365,5,299.95,59.99


In [0]:
%sql
-- TASK: Compute for daily revenue
select o.order_date
, round(sum(oi.order_item_subtotal), 2) as order_revenue
from orders  o 
join order_items oi on o.order_id = oi.order_item_order_id
where o.order_Status in ('COMPLETE', 'CLOSED')
group by o.order_date
order by o.order_date
limit 10;

order_date,order_revenue
2013-07-25 00:00:00.0,31547.23
2013-07-26 00:00:00.0,54713.23
2013-07-27 00:00:00.0,48411.48
2013-07-28 00:00:00.0,35672.03
2013-07-29 00:00:00.0,54579.7
2013-07-30 00:00:00.0,49329.29
2013-07-31 00:00:00.0,59212.49
2013-08-01 00:00:00.0,49160.08
2013-08-02 00:00:00.0,50688.58
2013-08-03 00:00:00.0,43416.74


In [0]:
%sql
-- Case 1: Saving above query result into table using CTAS
DROP TABLE IF EXISTS daily_revenue;
CREATE TABLE daily_revenue AS
select o.order_date
, round(sum(oi.order_item_subtotal), 2) as order_revenue
from orders  o 
join order_items oi on o.order_id = oi.order_item_order_id
where o.order_Status in ('COMPLETE', 'CLOSED')
group by o.order_date;

num_affected_rows,num_inserted_rows


In [0]:
%sql
select * from daily_revenue limit 10;
-- Note: Typically CTAS is used to create 'stage' tables 
-- They'll be dropped and created everytime we run our pipelines
-- With respect to Target table we pre-create the table and then we populate the data in the table

order_date,order_revenue
2013-08-13 00:00:00.0,17956.88
2013-10-12 00:00:00.0,35698.85
2013-11-15 00:00:00.0,34443.22
2013-09-16 00:00:00.0,29117.35
2013-09-20 00:00:00.0,29575.36
2013-09-06 00:00:00.0,61976.1
2013-10-14 00:00:00.0,24086.61
2013-11-11 00:00:00.0,57917.7
2013-09-07 00:00:00.0,45235.53
2013-09-11 00:00:00.0,22322.13


In [0]:
%sql
-- Case 2: Saving above query result into table using INSERT statement
drop table if exists daily_revenue;

create table daily_revenue(
  order_date date,
  order_revenue float
);

In [0]:
%sql
-- Approach1:  INSERT INTO 
INSERT INTO daily_revenue
select o.order_date
, round(sum(oi.order_item_subtotal), 2) as order_revenue
from orders  o 
join order_items oi on o.order_id = oi.order_item_order_id
where o.order_Status in ('COMPLETE', 'CLOSED')
group by o.order_date

num_affected_rows,num_inserted_rows
364,364


In [0]:
%sql
-- Approach 2 INSERT OVERWRITE
INSERT OVERWRITE daily_revenue
select o.order_date
, round(sum(oi.order_item_subtotal), 2) as order_revenue
from orders  o 
join order_items oi on o.order_id = oi.order_item_order_id
where o.order_Status in ('COMPLETE', 'CLOSED')
group by o.order_date

num_affected_rows,num_inserted_rows
364,364


#### Desing Pipeline using CTAS and INSERT in Spark SQL

In [0]:
%sql
-- main tables --> stage table (DROP and CREATE using CTAS or INSERT OVERWRITE) -> Final Reporting Table (INSERT INTO)

In [0]:
%sql
-- Design Pipeline Using INSERT

-- create table first if you are using INSERT
drop table if exists daily_revenue_stg;
create table daily_revenue_stg(
  order_Date date,
  order_revenue float
);


-- Populating stage table
INSERT OVERWRITE daily_revenue_stg
select o.order_date
, round(sum(oi.order_item_subtotal), 2) as order_revenue
from orders  o 
join order_items oi on o.order_id = oi.order_item_order_id
where o.order_Status in ('COMPLETE', 'CLOSED')
and date_format(o.order_date, 'yyyyMM') = 201308
group by o.order_date;


-- Populating final table 
INSERT INTO daily_revenue
SELECT * FROM  daily_revenue_stg;
 

num_affected_rows,num_inserted_rows
31,31


In [0]:
%sql
-- Design Pipeline Using CTAS

drop table if exists daily_revenue_stg;

create table daily_revenue_stg as
select o.order_date
, round(sum(oi.order_item_subtotal), 2) as order_revenue
from orders  o 
join order_items oi on o.order_id = oi.order_item_order_id
where o.order_Status in ('COMPLETE', 'CLOSED')
and date_format(o.order_date, 'yyyyMM') = 201309
group by o.order_date;


INSERT INTO daily_revenue
SELECT * FROM  daily_revenue_stg;

num_affected_rows,num_inserted_rows
30,30


In [0]:
%sql
select * from daily_revenue
order by order_date limit 10;

order_date,order_revenue
2013-07-25,31547.23
2013-07-26,54713.23
2013-07-27,48411.48
2013-07-28,35672.03
2013-07-29,54579.7
2013-07-30,49329.29
2013-07-31,59212.49
2013-08-01,49160.08
2013-08-01,49160.08
2013-08-02,50688.58


In [0]:
%sql
show tables;

database,tableName,isTemporary
itversity_retail_db,daily_revenue,False
itversity_retail_db,daily_revenue_stg,False
itversity_retail_db,order_items,False
itversity_retail_db,orders,False
