In [6]:
import pandas as pd
import platform
import re
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import numpy as np

## 導入數據集

In [7]:
def load_dataset():
    theOS = list(platform.uname())[0]
    if theOS == 'Windows':
        theOS = '\\'
        theEncode = 'utf-8-sig'
    else:
        theOS = '/'
        theEncode = 'utf-8'

    df = pd.read_csv('../data/ta_feng_all_months_merged.csv')
    return df
df = load_dataset()
df

Unnamed: 0,TRANSACTION_DT,CUSTOMER_ID,AGE_GROUP,PIN_CODE,PRODUCT_SUBCLASS,PRODUCT_ID,AMOUNT,ASSET,SALES_PRICE
0,11/1/2000,1104905,45-49,115,110411,4710199010372,2,24,30
1,11/1/2000,418683,45-49,115,120107,4710857472535,1,48,46
2,11/1/2000,1057331,35-39,115,100407,4710043654103,2,142,166
3,11/1/2000,1849332,45-49,Others,120108,4710126092129,1,32,38
4,11/1/2000,1981995,50-54,115,100205,4710176021445,1,14,18
...,...,...,...,...,...,...,...,...,...
817736,2/28/2001,312790,35-39,114,530501,4713317035042,2,80,118
817737,2/28/2001,57486,40-44,115,530209,4710731060124,1,40,55
817738,2/28/2001,733526,>65,Unknown,510539,4716340052307,1,78,115
817739,2/28/2001,173704,45-49,115,520457,4714276145315,1,90,96


## 生成月份表格 與 計算數量用的欄位

In [8]:
def gen_month_count(df):
    month = []
    for i in range(len(df)):
        month.append(re.findall('(\d+)',         df['TRANSACTION_DT'][i])[0])

    df['month'] = month
    df['count'] = 1
    return df
df = gen_month_count(df)
df

Unnamed: 0,TRANSACTION_DT,CUSTOMER_ID,AGE_GROUP,PIN_CODE,PRODUCT_SUBCLASS,PRODUCT_ID,AMOUNT,ASSET,SALES_PRICE,month,count
0,11/1/2000,1104905,45-49,115,110411,4710199010372,2,24,30,11,1
1,11/1/2000,418683,45-49,115,120107,4710857472535,1,48,46,11,1
2,11/1/2000,1057331,35-39,115,100407,4710043654103,2,142,166,11,1
3,11/1/2000,1849332,45-49,Others,120108,4710126092129,1,32,38,11,1
4,11/1/2000,1981995,50-54,115,100205,4710176021445,1,14,18,11,1
...,...,...,...,...,...,...,...,...,...,...,...
817736,2/28/2001,312790,35-39,114,530501,4713317035042,2,80,118,2,1
817737,2/28/2001,57486,40-44,115,530209,4710731060124,1,40,55,2,1
817738,2/28/2001,733526,>65,Unknown,510539,4716340052307,1,78,115,2,1
817739,2/28/2001,173704,45-49,115,520457,4714276145315,1,90,96,2,1


## 視覺化工具

In [14]:
def visualization_tool(df):
    selection = ['AGE_GROUP', 'PIN_CODE', 'month', 'PRODUCT_SUBCLASS', 'CUSTOMER_ID']
    app = dash.Dash(__name__)
    app.layout= html.Div([
        dcc.Dropdown (
            id="dropdown",
            options=[{"label": x, "value": x} for x in selection], 
            value='AGE_GROUP', 
            clearable=False,
            style= dict(width=500)
    ),

    dcc.Graph(id="bar-chart", style= dict(width=1200)),
    dcc.Graph(id="bar-chart2", style = dict(width=1200)),
    dcc.Graph(id="bar-chart3", style = dict(width=1200))
    ])

    @app.callback(
        Output ("bar-chart", "figure"), 
        [Input ("dropdown", "value")])
    def update_bar_chart (day):
        if day == "PRODUCT_SUBCLASS" or day == "CUSTOMER_ID":
            sale_mean = df.groupby(day)[ 'SALES_PRICE'].mean().sort_values(ascending = False)[:10]

        else:
            sale_mean = df.groupby(day)[ 'SALES_PRICE'].mean()
        fig = go.Figure()
        fig.add_trace(go.Bar(
        x=sale_mean.index.astype(str),
        y=sale_mean.values,
        name='平均花費金額',
        marker_color='#EA0000',
        text = np.around(sale_mean.values)
        ))

        if day == 'PIN_CODE':
            title_text = '各區域平均花費金額'
            x_title = '區域'
        elif day == 'AGE_GROUP':
            title_text ='各年齡層平均花費金額'
            x_title = '年齡'
        elif day == 'month':
            title_text = '各月份平均花費金額'
            x_title= '月份'
        elif day == 'PRODUCT_SUBCLASS':
            title_text = '前十名商品種類的平均花費金額'
            x_title = '商品種類'
        elif day == 'CUSTOMER_ID':
            title_text = '顧客平均花費金額前十名'
            x_title = '顧客'


        fig.update_layout(title_text=title_text, barmode='group', xaxis_tickangle=-45, width = 1200, 
                          xaxis = dict(title=x_title) ,yaxis = dict(title= '平均花費金額'))
        return fig


    @app.callback(
        Output ("bar-chart2", "figure"), 
        [Input ("dropdown", "value")])
    def update_bar_chart (day):
        if day == "PRODUCT_SUBCLASS" or day == "CUSTOMER_ID":
            amount = df.groupby(day)['AMOUNT'].mean().sort_values (ascending = False) [:10]
        else:
            amount = df.groupby(day)['AMOUNT'].mean()
        fig = go.Figure()
        fig.add_trace(go.Bar(
        x=amount.index.astype(str),
        y=amount.values,
        name='平均購買數量',
        marker_color='indianred',
        text = np.around(amount.values, decimals = 2)
        ))

        if day == 'PIN_CODE':
            title_text = '各區域平均購買商品數量(單次)'
            x_title = '區域'
        elif day == 'AGE_GROUP':
            title_text ='各年齡層平均購買商品數量(單次)'
            x_title = '年齡'
        elif day == 'month':
            title_text ='各月份平均購買商品數量(單次)'
            x_title= '月份'
        elif day == 'PRODUCT_SUBCLASS':
            title_text = '前十名商品種類的平均花費金額(單次)'
            x_title = '商品種類'
        elif day == 'CUSTOMER_ID':
            title_text = '顧客平均購買數量前十名(單次)'
            x_title = '顧客'

        fig.update_layout(title_text=title_text, barmode='group', xaxis_tickangle=-45, width = 1200, 
                          xaxis = dict(title=x_title) ,yaxis = dict(title= '平均購買數量'))
        return fig


    @app.callback(
        Output ("bar-chart3", "figure"), 
        [Input ("dropdown", "value")])
    def update_bar_chart (day):

        if day == "PRODUCT_SUBCLASS" or day == "CUSTOMER_ID":
            count = df.groupby(day)['count'].sum().sort_values (ascending = False) [:10]
        else:
            count = df.groupby (day)['count'].sum()
        fig = go.Figure ()
        fig.add_trace(go.Bar(
        x=count.index.astype(str),
        y=count.values,
        name='消費次數',
        marker_color='#EAC100',
        text = np.around(count.values)
        ))

        if day == 'PIN_CODE':
            title_text = '各區域總消費次數'
            x_title = '區域'
        elif day == 'AGE_GROUP':
            title_text ='各年齡層總消費次數'
            x_title = '年齡'
        elif day == 'month':
            title_text ='各月份總消費次數'
            x_title= '月份'
        elif day == 'PRODUCT_SUBCLASS':
            title_text = '總消費次數前十名的商品'
            x_title = '商品種類'
        elif day == 'CUSTOMER_ID':
            title_text = '總消費頻率前十名的顧客'
            x_title = '顧客'

        fig.update_layout(title_text=title_text, barmode='group', xaxis_tickangle=-45, width = 1200, 
                          xaxis = dict(title=x_title) ,yaxis = dict(title= '消費次數'))
        return fig

    app.run_server()

## 主程式

In [None]:
if __name__ == '__main__':
    df = load_dataset()
    df = gen_month_count(df)
    visualization_tool(df)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [23/Jan/2022 11:00:41] "POST /_dash-update-component HTTP/1.1" 200 -