In [None]:
import pandas as pd
import platform
import re
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import numpy as np

## 導入數據集

In [None]:
def load_dataset():
    theOS = list(platform.uname())[0]
    if theOS == 'Windows':
        theOS = '\\'
        theEncode = 'utf-8-sig'
    else:
        theOS = '/'
        theEncode = 'utf-8'

    df = pd.read_csv('../data/ta_feng_all_months_merged.csv')
    return df
df = load_dataset()
df

## 生成月份表格 與 計算數量用的欄位

In [None]:
def gen_month_count(df):
    month = []
    for i in range(len(df)):
        month.append(re.findall('(\d+)',         df['TRANSACTION_DT'][i])[0])

    df['month'] = month
    df['count'] = 1
    return df
df = gen_month_count(df)
df

## 視覺化工具

In [None]:
def visualization_tool(df):
    selection = ['AGE_GROUP', 'PIN_CODE', 'month', 'PRODUCT_SUBCLASS', 'CUSTOMER_ID']
    app = dash.Dash(__name__)
    app.layout= html.Div([
        dcc.Dropdown (
            id="dropdown",
            options=[{"label": x, "value": x} for x in selection], 
            value='AGE_GROUP', 
            clearable=False,
            style= dict(width=500)
    ),

    dcc.Graph(id="bar-chart", style= dict(width=1200)),
    dcc.Graph(id="bar-chart2", style = dict(width=1200)),
    dcc.Graph(id="bar-chart3", style = dict(width=1200))
    ])

    @app.callback(
        Output ("bar-chart", "figure"), 
        [Input ("dropdown", "value")])
    def update_bar_chart (day):
        if day == "PRODUCT_SUBCLASS" or day == "CUSTOMER_ID":
            sale_mean = df.groupby(day)[ 'SALES_PRICE'].mean().sort_values(ascending = False)[:10]

        else:
            sale_mean = df.groupby(day)[ 'SALES_PRICE'].mean()
        fig = go.Figure()
        fig.add_trace(go.Bar(
        x=sale_mean.index.astype(str),
        y=sale_mean.values,
        name='平均花費金額',
        marker_color='#EA0000',
        text = np.around(sale_mean.values)
        ))

        if day == 'PIN_CODE':
            title_text = '各區域平均花費金額'
            x_title = '區域'
        elif day == 'AGE_GROUP':
            title_text ='各年齡層平均花費金額'
            x_title = '年齡'
        elif day == 'month':
            title_text = '各月份平均花費金額'
            x_title= '月份'
        elif day == 'PRODUCT_SUBCLASS':
            title_text = '前十名商品種類的平均花費金額'
            x_title = '商品種類'
        elif day == 'CUSTOMER_ID':
            title_text = '顧客平均花費金額前十名'
            x_title = '顧客'


        fig.update_layout(title_text=title_text, barmode='group', xaxis_tickangle=-45, width = 1200, 
                          xaxis = dict(title=x_title) ,yaxis = dict(title= '平均花費金額'))
        return fig


    @app.callback(
        Output ("bar-chart2", "figure"), 
        [Input ("dropdown", "value")])
    def update_bar_chart (day):
        if day == "PRODUCT_SUBCLASS" or day == "CUSTOMER_ID":
            amount = df.groupby(day)['AMOUNT'].mean().sort_values (ascending = False) [:10]
        else:
            amount = df.groupby(day)['AMOUNT'].mean()
        fig = go.Figure()
        fig.add_trace(go.Bar(
        x=amount.index.astype(str),
        y=amount.values,
        name='平均購買數量',
        marker_color='indianred',
        text = np.around(amount.values)
        ))

        if day == 'PIN_CODE':
            title_text = '各區域平均購買商品數量(單次)'
            x_title = '區域'
        elif day == 'AGE_GROUP':
            title_text ='各年齡層平均購買商品數量(單次)'
            x_title = '年齡'
        elif day == 'month':
            title_text ='各月份平均購買商品數量(單次)'
            x_title= '月份'
        elif day == 'PRODUCT_SUBCLASS':
            title_text = '前十名商品種類的平均花費金額(單次)'
            x_title = '商品種類'
        elif day == 'CUSTOMER_ID':
            title_text = '顧客平均購買數量前十名(單次)'
            x_title = '顧客'

        fig.update_layout(title_text=title_text, barmode='group', xaxis_tickangle=-45, width = 1200, 
                          xaxis = dict(title=x_title) ,yaxis = dict(title= '平均購買數量'))
        return fig


    @app.callback(
        Output ("bar-chart3", "figure"), 
        [Input ("dropdown", "value")])
    def update_bar_chart (day):

        if day == "PRODUCT_SUBCLASS" or day == "CUSTOMER_ID":
            count = df.groupby(day)['count'].sum().sort_values (ascending = False) [:10]
        else:
            count = df.groupby (day)['count'].sum()
        fig = go.Figure ()
        fig.add_trace(go.Bar(
        x=count.index.astype(str),
        y=count.values,
        name='消費次數',
        marker_color='#EAC100',
        text = np.around(count.values)
        ))

        if day == 'PIN_CODE':
            title_text = '各區域總消費次數'
            x_title = '區域'
        elif day == 'AGE_GROUP':
            title_text ='各年齡層總消費次數'
            x_title = '年齡'
        elif day == 'month':
            title_text ='各月份總消費次數'
            x_title= '月份'
        elif day == 'PRODUCT_SUBCLASS':
            title_text = '總消費次數前十名的商品'
            x_title = '商品種類'
        elif day == 'CUSTOMER_ID':
            title_text = '總消費頻率前十名的顧客'
            x_title = '顧客'

        fig.update_layout(title_text=title_text, barmode='group', xaxis_tickangle=-45, width = 1200, 
                          xaxis = dict(title=x_title) ,yaxis = dict(title= '消費次數'))
        return fig

    app.run_server()

## 主程式

In [None]:
if __name__ == '__main__':
    df = load_dataset()
    df = gen_month_count(df)
    visualization_tool(df)