In [40]:
import mysql.connector
import pandas as pd
import os
from dotenv import load_dotenv
%load_ext sql

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 10) 

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [4]:
dotenv_path = "../.env"
load_dotenv(dotenv_path)

db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

In [15]:
conn = mysql.connector.connect(
    host = db_host,
    port = db_port,
    database = db_name,
    user = db_user,
    password = db_password
)

mycursor = conn.cursor()

In [11]:
mycursor.execute('show tables;')

for x in mycursor:
    print(x)

('customer',)
('discount',)
('marketing',)
('product',)
('sales',)
('tax',)


In [41]:
sql = '''
SELECT s.CustomerID, 
       c.SubscriptionPeriod,
       MIN(s.TransactionDate) AS 최초거래일, 
       MAX(s.TransactionDate) AS 최근거래일
FROM sales s
JOIN customer c ON s.CustomerID = c.CustomerID
GROUP BY s.CustomerID, c.SubscriptionPeriod;
'''
def show_sql_result(sql):
       mycursor.execute(sql)
       rows = mycursor.fetchall()
       columns = [column[0] for column in mycursor.description]
       table_data = pd.DataFrame(rows, columns=columns)
       return table_data

show_sql_result(sql)

Unnamed: 0,CustomerID,SubscriptionPeriod,최초거래일,최근거래일
0,USER_0000,31,2019-09-15,2019-09-15
1,USER_0001,20,2019-03-24,2019-11-02
2,USER_0002,39,2019-06-22,2019-10-19
3,USER_0003,25,2019-12-14,2019-12-14
4,USER_0004,31,2019-09-15,2019-09-15
...,...,...,...,...
1463,USER_1463,5,2019-04-05,2019-04-05
1464,USER_1464,43,2019-06-22,2019-10-05
1465,USER_1465,25,2019-04-05,2019-06-20
1466,USER_1466,47,2019-10-23,2019-10-23


In [43]:
sql = '''
SELECT s.CustomerID, 
       c.SubscriptionPeriod,
       MIN(s.TransactionDate) AS 최초거래일, 
       MAX(s.TransactionDate) AS 최근거래일,
       TIMESTAMPDIFF(MONTH, MIN(s.TransactionDate), MAX(s.TransactionDate)) + 1 AS 거래기간
FROM sales s
JOIN customer c ON s.CustomerID = c.CustomerID
GROUP BY s.CustomerID
HAVING 거래기간 > c.SubscriptionPeriod;
'''
show_sql_result(sql)

Unnamed: 0,CustomerID,SubscriptionPeriod,최초거래일,최근거래일,거래기간
0,USER_0034,2,2019-01-11,2019-10-30,10
1,USER_0147,8,2019-01-09,2019-11-12,11
2,USER_0200,5,2019-02-09,2019-08-31,7
3,USER_0203,4,2019-01-16,2019-09-30,9
4,USER_0270,2,2019-03-27,2019-07-08,4
...,...,...,...,...,...
34,USER_1379,7,2019-04-25,2019-12-13,8
35,USER_1391,10,2019-01-31,2019-12-13,11
36,USER_1393,7,2019-03-02,2019-11-19,9
37,USER_1397,6,2019-03-29,2019-12-12,9


- 거래기간보다 가입기간이 짧은 고객 : 39명(2.7%)
    - 단순 오류?
- 가입 기간 산정 방식
    - 19년 12월 기준 : 잘 안 맞는 데이터가 많음
    - 최근 거래일 기준 : 최초 가입일로부터 최근 거래일까지의 기간
    - 별도 서비스 가입 : 홍보 메일 수신, 유료 구독 서비스 등 별도 서비스 가입 기간을 계산

In [47]:
sql = '''
select * from sales;
'''
show_sql_result(sql)

Unnamed: 0,CustomerID,TransactionID,TransactionDate,ProductID,ProductCategory,Quantity,AverageAmount,ShippingFee,CouponStatus
0,USER_1358,Transaction_0000,2019-01-01,Product_0981,Nest-USA,1,153.71,6.50,Used
1,USER_1358,Transaction_0001,2019-01-01,Product_0981,Nest-USA,1,153.71,6.50,Used
2,USER_1358,Transaction_0002,2019-01-01,Product_0904,Office,1,2.05,6.50,Used
3,USER_1358,Transaction_0003,2019-01-01,Product_0203,Apparel,5,17.53,6.50,Not Used
4,USER_1358,Transaction_0003,2019-01-01,Product_0848,Bags,1,16.50,6.50,Used
...,...,...,...,...,...,...,...,...,...
52919,USER_1144,Transaction_9999,2019-06-07,Product_0224,Apparel,1,15.99,6.00,Clicked
52920,USER_1144,Transaction_9999,2019-06-07,Product_0337,Apparel,1,59.99,6.00,Clicked
52921,USER_1144,Transaction_9999,2019-06-07,Product_0418,Apparel,1,15.19,6.00,Used
52922,USER_1144,Transaction_9999,2019-06-07,Product_0634,Apparel,1,67.19,6.00,Used


In [48]:
sql = '''
Select CustomerID, TransactionID, ProductID, ProductCategory, Quantity, AverageAmount, (Quantity * AverageAmount) as 매출 
From sales
GROUP BY CustomerID, TransactionID
;
'''
show_sql_result(sql)

Unnamed: 0,CustomerID,TransactionID,ProductID,ProductCategory,Quantity,AverageAmount,매출
0,USER_0000,Transaction_16900,Product_0048,Apparel,1,19.99,19.99
1,USER_0001,Transaction_20178,Product_0905,Office,1,2.39,2.39
2,USER_0001,Transaction_20179,Product_0195,Lifestyle,3,3.19,9.57
3,USER_0001,Transaction_20180,Product_0989,Nest-USA,1,149.00,149.00
4,USER_0001,Transaction_20181,Product_0989,Nest-USA,1,149.00,149.00
...,...,...,...,...,...,...,...
26626,USER_1467,Transaction_18634,Product_0992,Nest,1,99.00,99.00
26627,USER_1467,Transaction_18635,Product_0880,Drinkware,4,2.99,11.96
26628,USER_1467,Transaction_18636,Product_0837,Apparel,1,15.19,15.19
26629,USER_1467,Transaction_18637,Product_0095,Apparel,1,15.00,15.00


In [49]:
sql = '''
SELECT 
    SUM(매출) AS 총매출, COUNT(DISTINCT CustomerID) AS 총고객수, SUM(매출) / COUNT(DISTINCT CustomerID) AS ARPU
FROM (
    SELECT 
        CustomerID, 
        TransactionID, 
        SUM(Quantity * AverageAmount) AS 매출
    FROM sales
    GROUP BY CustomerID, TransactionID
) AS 매출_합계
;
'''
show_sql_result(sql)

Unnamed: 0,총매출,총고객수,ARPU
0,4670794.62,1468,3181.740204
