### 1. 일/주/월/분기별 매출액 및 주문 건수

In [1]:
import pandas as pd 
from sqlalchemy import create_engine
import plotly.express as px

In [2]:
conn_string = 'postgresql://postgres:admin1234@localhost:5432/postgres'
postgres_engine = create_engine(conn_string)

#### 일별

In [3]:
query = """
    SELECT  
        date_trunc('day', order_date)::date as day
        , sum(amount) as sum_amount
        , count(distinct o.order_id) as daily_order_cnt
    FROM nw.orders o
    JOIN nw.order_items oi 
    ON o.order_id = oi.order_id
    GROUP BY date_trunc('day', order_date)::date
    ORDER BY date_trunc('day', order_date)::date
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head()

Unnamed: 0,day,sum_amount,daily_order_cnt
0,1996-07-04,440.0,1
1,1996-07-05,1863.4,1
2,1996-07-08,2206.66,2
3,1996-07-09,3597.9,1
4,1996-07-10,1444.8,1


In [4]:
fig = px.line(data_frame=df, x='day', y='sum_amount')
fig.show()

In [5]:
df.head(20)

Unnamed: 0,day,sum_amount,daily_order_cnt
0,1996-07-04,440.0,1
1,1996-07-05,1863.4,1
2,1996-07-08,2206.66,2
3,1996-07-09,3597.9,1
4,1996-07-10,1444.8,1
5,1996-07-11,556.62,1
6,1996-07-12,2490.5,1
7,1996-07-15,517.8,1
8,1996-07-16,1119.9,1
9,1996-07-17,1614.88,1


#### 주별

In [6]:
query = """
    SELECT  
        date_trunc('week', order_date)::date as week
        , sum(amount) as sum_amount
        , count(distinct o.order_id) as weekly_order_cnt
    FROM nw.orders o
    JOIN nw.order_items oi 
    ON o.order_id = oi.order_id
    GROUP BY date_trunc('week', order_date)::date
    ORDER BY date_trunc('week', order_date)::date
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head()

Unnamed: 0,week,sum_amount,weekly_order_cnt
0,1996-07-01,2303.4,2
1,1996-07-08,10296.48,6
2,1996-07-15,5306.03,6
3,1996-07-22,4675.98,5
4,1996-07-29,8160.0,6


In [7]:
fig = px.line(data_frame=df, x='week', y='sum_amount')
fig.show()

#### 월별

In [8]:
query = """
    SELECT  
        date_trunc('month', order_date)::date as month
        , sum(amount) as sum_amount
        , count(distinct o.order_id) as ord_cnt
    FROM nw.orders o
    JOIN nw.order_items oi 
    ON o.order_id = oi.order_id
    GROUP BY date_trunc('month', order_date)::date
    ORDER BY date_trunc('month', order_date)::date
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head()

Unnamed: 0,month,sum_amount,ord_cnt
0,1996-07-01,27861.89,22
1,1996-08-01,25485.27,25
2,1996-09-01,26381.4,23
3,1996-10-01,37515.72,26
4,1996-11-01,45600.04,25


In [9]:
fig = px.line(data_frame=df, x='month', y='sum_amount')
fig.show()

In [10]:
import plotly.graph_objects as go 
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(name="monthly_amount"
                     , x = df['month']
                     , y=df['sum_amount']
                     ), secondary_y = False)
fig.add_trace(go.Scatter(name="monthly_count"
                     , x = df['month']
                     , y=df['ord_cnt']
                     ), secondary_y = True)

#### 분기별 매출

In [11]:
query = """
    SELECT  
        date_trunc('quarter', order_date)::date
        , sum(amount)
        , count(distinct o.order_id) as quarterly_order_cnt
    FROM nw.orders o
    JOIN nw.order_items oi 
    ON o.order_id = oi.order_id
    GROUP BY date_trunc('quarter', order_date)::date
    ORDER BY date_trunc('quarter', order_date)::date
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head()

Unnamed: 0,date_trunc,sum,quarterly_order_cnt
0,1996-07-01,79728.56,70
1,1996-10-01,128355.39,82
2,1997-01-01,138288.9,92
3,1997-04-01,143177.03,93
4,1997-07-01,153937.74,103


### 2. 월별 상품카테고리별 매출액 및 주문건수, 월 전체 매출액 대비 비율

1. 상품 카테고릴 별 월별 매출액 추출
2. 1번의 집합에서 전체 매출액을 analytic으로 구한 뒤 매출액 비율 계산

In [14]:
query = """
WITH temp_01 as (
    SELECT
        c.category_name
        , to_char(date_trunc('month', o.order_date), 'yyyymm') as month
        , sum(amount) as sum_amount
        , count(distinct o.order_id) as monthly_ord_cnt
    FROM nw.orders o
    JOIN nw.order_items oi
        ON o.order_id = oi.order_id
    JOIN nw.products p 
        ON oi.product_id = p.product_id
    JOIN nw.categories c
        ON p.category_id = c.category_id
    GROUP BY c.category_name, to_char(date_trunc('month', o.order_date), 'yyyymm')
)
SELECT *
    , sum(sum_amount) over (PARTITION BY month) as month_tot_amount
    , sum_amount / sum(sum_amount) over (PARTITION BY month) as monthly_ratio
FROM temp_01
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head()

Unnamed: 0,category_name,month,sum_amount,monthly_ord_cnt,month_tot_amount,monthly_ratio
0,Confections,199607,5775.15,8,27861.89,0.207278
1,Condiments,199607,1878.2,6,27861.89,0.067411
2,Beverages,199607,3182.5,11,27861.89,0.114224
3,Dairy Products,199607,6838.34,9,27861.89,0.245437
4,Grains/Cereals,199607,1256.86,4,27861.89,0.04511


In [17]:
fig = px.line(data_frame = df, x='month', y='sum_amount', color='category_name'
              , markers=True)
fig.show()

In [19]:
import plotly.express as px 

fig = px.bar(data_frame=df, x='month', y='sum_amount', text='monthly_ratio', color='category_name')
fig.show()

### 3. 상품별 매출액과 상품 카테고리 매출액 대비 비율, 해당 상품 카테고리에서 상품의 매출순위 SQL로 구하기

In [20]:
query = """
WITH temp_01 as (
    SELECT
        oi.product_id
        , max(p.product_name) as product_name
        , max(c.category_name) as category_name
        , sum(amount) as sum_amount
    FROM nw.order_items oi
    JOIN nw.products p 
        ON oi.product_id = p.product_id
    JOIN nw.categories c
        ON p.category_id = c.category_id
    GROUP BY oi.product_id
)

SELECT 
    product_name
    , sum_amount as product_sales
    , category_name
    , sum(sum_amount) OVER (PARTITION BY category_name) as category_sales
    , round(sum_amount / sum(sum_amount) OVER (PARTITION BY category_name), 3) as product_category_ratio
    , row_number() OVER(PARTITION BY category_name ORDER BY sum_amount DESC) as product_rn
FROM temp_01
ORDER BY category_name, product_sales DESC
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head()

Unnamed: 0,product_name,product_sales,category_name,category_sales,product_category_ratio,product_rn
0,Côte de Blaye,141396.73,Beverages,267868.16,0.528,1
1,Ipoh Coffee,23526.7,Beverages,267868.16,0.088,2
2,Chang,16355.96,Beverages,267868.16,0.061,3
3,Lakkalikööri,15760.44,Beverages,267868.16,0.059,4
4,Steeleye Stout,13644.0,Beverages,267868.16,0.051,5


In [21]:
import plotly.express as px 
import numpy as np 

fig = px.treemap(df, path=[px.Constant('total'), 'category_name', 'product_name']
                 , values='product_sales'
                 , color='product_sales')
fig.show()

### 4. 동년도 월별 누적 매출 및 동일 분기 월별 누적 매출

1. 월별 매출액을 구한다.
2. 월별 매출액 집합에 동일년도의 월별 누적 매출과 동일 분기의 월별 누적 매출을 구함

In [37]:
query = """
WITH temp_01 AS (
    SELECT 
        date_trunc('month', order_date)::date as month_day
        , sum(amount) as sum_amount
    FROM nw.orders a
    JOIN nw.order_items b
    ON a.order_id = b.order_id
    GROUP BY date_trunc('month', order_date)::date
)

SELECT 
    month_day
    , date_trunc('year', month_day)::date as year_month
    , date_trunc('quarter', month_day)::date as quarter_month    
    , sum_amount
    , sum(sum_amount) OVER (PARTITION BY date_trunc('year', month_day)::date ORDER BY month_day) as yearly_cumulative_sales
    , sum(sum_amount) OVER (PARTITION BY date_trunc('quarter', month_day)::date ORDER BY month_day) as quarterly_cumulative_sales
FROM temp_01
ORDER BY month_day
"""

In [38]:
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,month_day,year_month,quarter_month,sum_amount,yearly_cumulative_sales,quarterly_cumulative_sales
0,1996-07-01,1996-01-01,1996-07-01,27861.89,27861.89,27861.89
1,1996-08-01,1996-01-01,1996-07-01,25485.27,53347.16,53347.16
2,1996-09-01,1996-01-01,1996-07-01,26381.4,79728.56,79728.56
3,1996-10-01,1996-01-01,1996-10-01,37515.72,117244.28,37515.72
4,1996-11-01,1996-01-01,1996-10-01,45600.04,162844.32,83115.76
5,1996-12-01,1996-01-01,1996-10-01,45239.63,208083.95,128355.39
6,1997-01-01,1997-01-01,1997-01-01,61258.06,61258.06,61258.06
7,1997-02-01,1997-01-01,1997-01-01,38483.63,99741.69,99741.69
8,1997-03-01,1997-01-01,1997-01-01,38547.21,138288.9,138288.9
9,1997-04-01,1997-01-01,1997-04-01,53032.95,191321.85,53032.95


In [39]:
fig = px.line(data_frame=df, x='month_day', y='yearly_cumulative_sales', markers=True)
fig.show()

### 5. 이동평균, 가중이동평균

In [45]:
query = """
WITH temp_01 AS (
    SELECT 
        date_trunc('day', order_date)::date as d_day
        , sum(amount) as sum_amount
    FROM nw.orders a 
    JOIN nw.order_items b 
    ON a.order_id = b.order_id
    WHERE order_date >= to_date('1996-07-08', 'yyyy-mm-dd')
    GROUP BY date_trunc('day', order_date)::date
), temp_02 AS (
    SELECT 
        d_day
        , sum_amount
        , avg(sum_amount) OVER (ORDER BY d_day rows between 4 preceding and current row) as m_avg_5days
        , row_number() OVER (ORDER BY d_day) as rnum
    FROM temp_01
)

SELECT 
    d_day
    , sum_amount
    , rnum
    , CASE 
        WHEN rnum < 5 THEN null
        ELSE m_avg_5days END AS m_avg_5days
    
FROM temp_02;


"""

In [46]:
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,d_day,sum_amount,rnum,m_avg_5days
0,1996-07-08,2206.66,1,
1,1996-07-09,3597.9,2,
2,1996-07-10,1444.8,3,
3,1996-07-11,556.62,4,
4,1996-07-12,2490.5,5,2059.296
5,1996-07-15,517.8,6,1721.524
6,1996-07-16,1119.9,7,1225.924
7,1996-07-17,1614.88,8,1259.94
8,1996-07-18,100.8,9,1168.776
9,1996-07-19,1952.65,10,1061.206
