## 월별 매출 시각화

In [43]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sqlalchemy import create_engine

In [2]:
conn_string = 'postgresql://postgres:postgres@localhost:5432/online_sales'
postgres_engine = create_engine(conn_string)

In [10]:
query = """
select to_char(date_trunc('month', invoice_date), 'yyyy-mm') as month,
	   sum(quantity * unit_price) as sum_amount,
	   count(distinct invoice_num) as order_cnt
from os.sales s 
group by 1
order by 1
"""

df = pd.read_sql_query(sql=query, con=postgres_engine)

In [11]:
fig = px.line(data_frame=df, x='month', y='sum_amount')
fig.update_xaxes(type='category')
fig.show()

---

## 월별 매출 및 주문건수 시각화

In [7]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(name='monthly_amount', x=df['month'], y=df['sum_amount']), secondary_y=False)
fig.add_trace(go.Scatter(name='monthly_count', x=df['month'], y=df['order_cnt']), secondary_y=True)

---

In [12]:
query = """
select country, to_char(date_trunc('month', invoice_date), 'yyyy-mm') as year_month,
	   sum(quantity * unit_price) as sum_amount,
	   count(distinct invoice_num) as daily_order_cnt
from os.sales s 
group by 1,2
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,country,year_month,sum_amount,daily_order_cnt
0,Australia,2010-12,1032.85,3
1,Australia,2011-01,9017.71,9
2,Australia,2011-02,14695.42,5
3,Australia,2011-03,17223.99,3
4,Australia,2011-04,771.6,2
5,Australia,2011-05,13638.41,4
6,Australia,2011-06,25187.77,4
7,Australia,2011-07,4964.38,7
8,Australia,2011-08,22489.2,2
9,Australia,2011-09,5106.73,8


In [30]:
df['country'].unique()

array(['Australia', 'Austria', 'Bahrain', 'Belgium', 'Channel Islands',
       'Cyprus', 'Czech Republic', 'Denmark', 'EIRE',
       'European Community', 'Finland', 'France', 'Germany', 'Greece',
       'Iceland', 'Israel', 'Italy', 'Japan', 'Lebanon', 'Lithuania',
       'Malta', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'RSA',
       'Saudi Arabia', 'Singapore', 'Spain', 'Sweden', 'Switzerland',
       'USA', 'United Arab Emirates', 'United Kingdom', 'Unspecified'],
      dtype=object)

In [29]:
fig = px.line(data_frame=df, x='year_month', y='sum_amount', color='country', markers=True)
fig.show()

---

## 국가별 월별 매출액 추이 - 매출순위 TOP3에 대하여

In [37]:
query = """
with 
amount_ranking as (
	select country, 
		   sum(quantity * unit_price) as total_amount,
		   rank() over (order by sum(quantity * unit_price) desc) as amount_rank
	from os.sales
	group by 1
	order by 2 desc
),
ranking_top3 as (
	select *
	from amount_ranking
	where amount_rank <= 3
),
monthly_sales as (
	select country, to_char(date_trunc('month', invoice_date)::date, 'yyyy-mm') as year_month, 
		   count(distinct invoice_num) as monthly_order_count,
		   sum(unit_price * quantity) as sum_amount
	from os.sales
	group by 1,2
),
monthly_total as (
	select year_month, sum(sum_amount) as monthly_total_amount
	from monthly_sales
	group by 1
	order by 1
)
select rt.country, ms.year_month, ms.sum_amount, mt.monthly_total_amount, 
	   round(ms.sum_amount / mt.monthly_total_amount * 100, 2) as monthly_amount_ratio
from monthly_total mt
	join monthly_sales ms on ms.year_month = mt.year_month
	join ranking_top3 rt on rt.country = ms.country
order by mt.year_month
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,country,year_month,sum_amount,monthly_total_amount,monthly_amount_ratio
0,United Kingdom,2010-12,498661.85,572713.89,87.07
1,EIRE,2010-12,8813.88,572713.89,1.54
2,Netherlands,2010-12,8784.48,572713.89,1.53
3,United Kingdom,2011-01,442190.06,569445.04,77.65
4,Netherlands,2011-01,26611.16,569445.04,4.67
5,EIRE,2011-01,21904.19,569445.04,3.85
6,Netherlands,2011-02,23011.91,447137.35,5.15
7,EIRE,2011-02,10126.52,447137.35,2.26
8,United Kingdom,2011-02,355655.63,447137.35,79.54
9,EIRE,2011-03,21674.36,595500.76,3.64


In [38]:
fig = px.bar(data_frame=df,  x='year_month', y='sum_amount', text='monthly_amount_ratio', color='country')
fig.show()

---

## Treemap을 통한 매출 상위 10개국 시장 규모 시각화

In [47]:
query = """
with 
amount_ranking as (
	select country, 
		   sum(quantity * unit_price) as total_amount,
		   rank() over (order by sum(quantity * unit_price) desc) as amount_rank
	from os.sales
	group by 1
	order by 2 desc
),
ranking_top10 as (
	select *
	from amount_ranking
	where amount_rank <= 10
),
monthly_sales as (
	select country, to_char(date_trunc('month', invoice_date)::date, 'yyyy-mm') as year_month, 
		   count(distinct invoice_num) as monthly_order_count,
		   sum(unit_price * quantity) as sum_amount
	from os.sales
	group by 1,2
),
monthly_total as (
	select year_month, sum(sum_amount) as monthly_total_amount
	from monthly_sales
	group by 1
	order by 1
)
select rt.country, ms.year_month, ms.sum_amount, mt.monthly_total_amount, 
	   round(ms.sum_amount / mt.monthly_total_amount * 100, 2) as monthly_amount_ratio
from monthly_total mt
	join monthly_sales ms on ms.year_month = mt.year_month
	join ranking_top10 rt on rt.country = ms.country
order by mt.year_month
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,country,year_month,sum_amount,monthly_total_amount,monthly_amount_ratio
0,Switzerland,2010-12,1304.92,572713.89,0.23
1,Germany,2010-12,15241.14,572713.89,2.66
2,Belgium,2010-12,1809.91,572713.89,0.32
3,France,2010-12,9616.31,572713.89,1.68
4,Australia,2010-12,1032.85,572713.89,0.18
5,Spain,2010-12,1843.73,572713.89,0.32
6,EIRE,2010-12,8813.88,572713.89,1.54
7,Netherlands,2010-12,8784.48,572713.89,1.53
8,United Kingdom,2010-12,498661.85,572713.89,87.07
9,Sweden,2010-12,3834.3,572713.89,0.67


In [48]:
fig = px.treemap(df, path=[px.Constant('total'), 'country'], values='sum_amount', color='sum_amount')
fig.show()

### 1위를 제외한 나머지 TOP10의 시장 규모 시각화

In [51]:
query = """
with 
amount_ranking as (
	select country, 
		   sum(quantity * unit_price) as total_amount,
		   rank() over (order by sum(quantity * unit_price) desc) as amount_rank
	from os.sales
	group by 1
	order by 2 desc
),
ranking_top10 as (
	select *
	from amount_ranking
	where amount_rank > 1 and amount_rank <= 10
),
monthly_sales as (
	select country, to_char(date_trunc('month', invoice_date)::date, 'yyyy-mm') as year_month, 
		   count(distinct invoice_num) as monthly_order_count,
		   sum(unit_price * quantity) as sum_amount
	from os.sales
	group by 1,2
),
monthly_total as (
	select year_month, sum(sum_amount) as monthly_total_amount
	from monthly_sales
	group by 1
	order by 1
)
select rt.country, ms.year_month, ms.sum_amount, mt.monthly_total_amount, 
	   round(ms.sum_amount / mt.monthly_total_amount * 100, 2) as monthly_amount_ratio
from monthly_total mt
	join monthly_sales ms on ms.year_month = mt.year_month
	join ranking_top10 rt on rt.country = ms.country
order by mt.year_month
"""
df = pd.read_sql_query(sql=query, con=postgres_engine)
df.head(10)

Unnamed: 0,country,year_month,sum_amount,monthly_total_amount,monthly_amount_ratio
0,France,2010-12,9616.31,572713.89,1.68
1,Switzerland,2010-12,1304.92,572713.89,0.23
2,Germany,2010-12,15241.14,572713.89,2.66
3,Belgium,2010-12,1809.91,572713.89,0.32
4,Sweden,2010-12,3834.3,572713.89,0.67
5,Spain,2010-12,1843.73,572713.89,0.32
6,Netherlands,2010-12,8784.48,572713.89,1.53
7,EIRE,2010-12,8813.88,572713.89,1.54
8,Australia,2010-12,1032.85,572713.89,0.18
9,EIRE,2011-01,21904.19,569445.04,3.85


In [52]:
fig = px.treemap(df, path=[px.Constant('total'), 'country'], values='sum_amount', color='sum_amount')
fig.show()