### Postgresql 연동 라이브러리 로딩 및 DB 접속

In [11]:
import pandas as pd
from sqlalchemy import create_engine

In [12]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [13]:
conn_string = 'postgresql://postgres:postgres@localhost:5432/postgres'
postgres_engine = create_engine(conn_string)

### 작년 대비 동월 매출액 비교(aka 작대비) 시각화

In [14]:
query = """
with 
temp_01 as (
select date_trunc('month', order_date)::date as month_day
	, sum(amount) as sum_amount
from nw.orders a
	join nw.order_items b on a.order_id = b.order_id
group by date_trunc('month', order_date)::date
),
temp_02 as (
select month_day, sum_amount as curr_amount 
	, lag(month_day, 12) over (order by month_day) as prev_month_1year
	, lag(sum_amount, 12) over (order by month_day) as prev_amount_1year
from temp_01
) 
select *
	, curr_amount - prev_amount_1year as diff_amount
	, 100.0 * curr_amount / prev_amount_1year as prev_pct
	, 100.0 * (curr_amount - prev_amount_1year) / prev_amount_1year as prev_growth_pct
from temp_02 
where prev_month_1year is not null;
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)

In [15]:
df.head(10)

Unnamed: 0,month_day,curr_amount,prev_month_1year,prev_amount_1year,diff_amount,prev_pct,prev_growth_pct
0,1997-07-01,51020.84,1996-07-01,27861.89,23158.95,183.120528,83.120528
1,1997-08-01,47287.67,1996-08-01,25485.27,21802.4,185.549025,85.549025
2,1997-09-01,55629.23,1996-09-01,26381.4,29247.83,210.865345,110.865345
3,1997-10-01,66749.24,1996-10-01,37515.72,29233.52,177.923388,77.923388
4,1997-11-01,43533.79,1996-11-01,45600.04,-2066.25,95.468754,-4.531246
5,1997-12-01,71398.41,1996-12-01,45239.63,26158.78,157.82271,57.82271
6,1998-01-01,94222.12,1997-01-01,61258.06,32964.06,153.811792,53.811792
7,1998-02-01,99415.29,1997-02-01,38483.63,60931.66,258.331374,158.331374
8,1998-03-01,104854.15,1997-03-01,38547.21,66306.94,272.014888,172.014888
9,1998-04-01,123798.69,1997-04-01,53032.95,70765.74,233.437306,133.437306


In [16]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# plotly express는 서로 다른 그래프를 하나의 group으로 묶기가 어려움. graph_object를 적용. 
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Bar(
    x=df['month_day'],
    y=df['prev_amount_1year'],
    name='amount 1 year ago',
    marker_color='lightsalmon'), secondary_y=False)
fig.add_trace(go.Bar(
    x=df['month_day'],
    y=df['curr_amount'],
    name='current month amount',
    marker_color='indianred'), secondary_y=False)

# 서로 다른 바차트를 하나의 group으로 묶음. 
fig.update_layout(barmode='group', xaxis_tickangle=-45)

# 작년 대비 비율
fig.add_trace(go.Scatter(
    x=df['month_day'],
    y=df['prev_pct'],
    name='vs previous percent', 
    ), secondary_y=True)
fig.update_xaxes(type='category')
fig.show()

In [17]:
query="""
with temp06
as(
select date_trunc('month',a.order_date)::date date, sum(b.amount)
from nw.orders a
join nw.order_items b on a.order_id = b.order_id 
group by date_trunc('month',a.order_date)::date
order by date
)
select a.date prev_month, a.sum prev_amount, b.date curr_month, b.sum curr_amount,
b.sum-a.sum diff_amount, ((b.sum-a.sum)/a.sum)*100 growth_percent
from temp06 a
join temp06 b on to_char(date_trunc('year',a.date),'yyyy') < to_char(date_trunc('year',b.date),'yyyy')
and to_char(date_trunc('month',a.date),'mm') = to_char(date_trunc('month',b.date),'mm')
order by a.date"""

df = pd.read_sql_query(sql = query, con = postgres_engine)
df.head(5)

Unnamed: 0,prev_month,prev_amount,curr_month,curr_amount,diff_amount,growth_percent
0,1996-07-01,27861.89,1997-07-01,51020.84,23158.95,83.120528
1,1996-08-01,25485.27,1997-08-01,47287.67,21802.4,85.549025
2,1996-09-01,26381.4,1997-09-01,55629.23,29247.83,110.865345
3,1996-10-01,37515.72,1997-10-01,66749.24,29233.52,77.923388
4,1996-11-01,45600.04,1997-11-01,43533.79,-2066.25,-4.531246


In [18]:
df['curr_month']

0     1997-07-01
1     1997-08-01
2     1997-09-01
3     1997-10-01
4     1997-11-01
5     1997-12-01
6     1998-01-01
7     1998-02-01
8     1998-03-01
9     1998-04-01
10    1998-05-01
Name: curr_month, dtype: object

In [19]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Bar(
x=df['curr_month'],y=df['prev_amount'],name='prev_month_amount'),secondary_y=False)

fig.add_trace(go.Bar(
x=df['curr_month'],y=df['curr_amount'],name='curr_month_amount'),secondary_y=False)
fig.update_layout(barmode='group', xaxis_tickangle=-45)

fig.add_trace(go.Scatter(
x=df['curr_month'],y=df['growth_percent'],name='growth_percentage'),secondary_y=True)
fig.update_xaxes(type='category')
fig.show()

### 카테고리 별 기준 월 대비 매출 비율 추이(aka 매출 팬 차트)

In [20]:
query = """
with 
temp_01 as (
select d.category_name, to_char(date_trunc('month', order_date), 'yyyymm') as month_day
	, sum(amount) as sum_amount
from nw.orders a
	join nw.order_items b on a.order_id = b.order_id
	join nw.products c on b.product_id = c.product_id 
    join nw.categories d on c.category_id = d.category_id
where order_date between to_date('1996-07-01', 'yyyy-mm-dd') and to_date('1997-06-30', 'yyyy-mm-dd')
group by d.category_name, to_char(date_trunc('month', order_date), 'yyyymm')
)
select category_name, month_day, sum_amount
	, first_value(sum_amount) over (partition by category_name order by month_day) as base_amount 
	, round(100.0 * sum_amount/first_value(sum_amount) over (partition by category_name order by month_day), 2) as base_ratio 
from temp_01
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)

In [21]:
df.head(10)

Unnamed: 0,category_name,month_day,sum_amount,base_amount,base_ratio
0,Beverages,199607,3182.5,3182.5,100.0
1,Beverages,199608,4866.88,3182.5,152.93
2,Beverages,199609,5088.4,3182.5,159.89
3,Beverages,199610,8187.36,3182.5,257.26
4,Beverages,199611,17162.06,3182.5,539.26
5,Beverages,199612,9431.8,3182.5,296.36
6,Beverages,199701,21904.16,3182.5,688.27
7,Beverages,199702,2845.84,3182.5,89.42
8,Beverages,199703,10636.88,3182.5,334.23
9,Beverages,199704,7074.35,3182.5,222.29


In [22]:
import plotly.express as px

fig = px.line(data_frame=df,  x='month_day', y='base_ratio', color='category_name', markers=True)
fig.show()

### 매출 Z 차트 시각화

In [23]:
query = """
with 
temp_01 as (
	select to_char(a.order_date, 'yyyymm') as year_month
		, sum(b.amount) as sum_amount
	from nw.orders a
		join nw.order_items b
			on a.order_id = b.order_id
	group by to_char(a.order_date, 'yyyymm')
), 
temp_02 as (
select year_month, substring(year_month, 1, 4) as year
	, sum_amount
	, sum(sum_amount) over (partition by substring(year_month, 1, 4) order by year_month) as acc_amount
	, sum(sum_amount) over (order by year_month rows between 11 preceding and current row) as year_ma_amount
from temp_01 -- where year_month between '199708' and '199805' 와 같이 사용하면 안됨. where절이 먼저 수행되므로 sum() analytics가 제대로 동작하지 않음.
)
select * from temp_02 where year='1997' --where year_month >= '199801' and year_month <= '199805';
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,year_month,year,sum_amount,acc_amount,year_ma_amount
0,199701,1997,61258.06,61258.06,269342.01
1,199702,1997,38483.63,99741.69,307825.64
2,199703,1997,38547.21,138288.9,346372.85
3,199704,1997,53032.95,191321.85,399405.8
4,199705,1997,53781.29,245103.14,453187.09
5,199706,1997,36362.79,281465.93,489549.88
6,199707,1997,51020.84,332486.77,512708.83
7,199708,1997,47287.67,379774.44,534511.23
8,199709,1997,55629.23,435403.67,563759.06
9,199710,1997,66749.24,502152.91,592992.58


In [24]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['year_month'],
    y=df['sum_amount'],
    name='월별 매출')
    )
fig.add_trace(go.Scatter(
    x=df['year_month'],
    y=df['acc_amount'],
    name='누적 매출')
    )
fig.add_trace(go.Scatter(
    x=df['year_month'],
    y=df['year_ma_amount'],
    name='년간 이동 매출')
    )
fig.show()

In [25]:
query="""
with temp10 as(
select a.country,e.category_name,sum(c.amount) sum_amount
from nw.customers a
join nw.orders b on a.customer_id = b.customer_id 
join nw.order_items c on b.order_id = c.order_id 
join nw.products d on d.product_id = c.product_id 
join nw.categories e on d.category_id = e.category_id 
group by a.country,e.category_name
order by a.country
)
select *,sum_amount/sum(sum_amount) over(partition by country) ratio from temp10"""

df = pd.read_sql_query(sql=query,con=postgres_engine)
df.head(5)

Unnamed: 0,country,category_name,sum_amount,ratio
0,Argentina,Confections,2135.1,0.262972
1,Argentina,Seafood,606.5,0.0747
2,Argentina,Grains/Cereals,390.0,0.048035
3,Argentina,Condiments,907.0,0.111712
4,Argentina,Produce,1139.0,0.140286


In [26]:
import plotly.express as px
px.bar(df,x='country',y='ratio',color='category_name',hover_data=['sum_amount'])

In [27]:
import pandas as pd
query = """
with temp08_01 as
(
select date_trunc('month',a.order_date)::date date, sum(amount) monthly_sum,
row_number() over(order by date_trunc('month',a.order_date)::date) r_num
from nw.orders a
join nw.order_items b on a.order_id = b.order_id 
group by date_trunc('month',a.order_date)::date
order by date
),
temp08_02 as(
select date, monthly_sum,sum(monthly_sum) over(rows between 12 preceding and current row) ma12_sum
from temp08_01
)
select a.date,a.monthly_sum, sum(b.monthly_sum) over(order by a.date) accum_sum,b.ma12_sum
from temp08_01 a
join temp08_02 b on a.date = b.date
where a.r_num>10
"""
df = pd.read_sql_query(sql = query, con = postgres_engine)
df.head()

Unnamed: 0,date,monthly_sum,accum_sum,ma12_sum
0,1997-05-01,53781.29,53781.29,453187.09
1,1997-06-01,36362.79,90144.08,489549.88
2,1997-07-01,51020.84,141164.92,540570.72
3,1997-08-01,47287.67,188452.59,559996.5
4,1997-09-01,55629.23,244081.82,590140.46


In [28]:
fig = fig = make_subplots(specs=[[{"secondary_y": False}]])
fig.add_trace(go.Scatter(
x=df['date'],y=df['monthly_sum'],name='monthly_sum'))

fig.add_trace(go.Scatter(
x=df['date'],y=df['accum_sum'],name='accumulative_sum'))

fig.add_trace(go.Scatter(
x=df['date'],y=df['ma12_sum'],name = 'moving_average'))

fig.update_xaxes(type='category')
fig.show()