# ３章　可視化の仕組みを構築する１０本ノック

### ノック２１：店舗を絞り込んで可視化できるようにしてみよう

In [None]:
import pandas as pd
from IPython.display import display, clear_output

m_store = pd.read_csv("m_store.csv")
m_area = pd.read_csv("m_area.csv")
order_data = pd.read_csv("tbl_order_202004.csv")
order_data = pd.merge(order_data, m_store, on="store_id", how="left")
order_data = pd.merge(order_data, m_area, on="area_cd", how="left")

# マスターにないコードに対応した文字列を設定
order_data.loc[order_data["takeout_flag"]==0, "takeout_name"] = "デリバリー"
order_data.loc[order_data["takeout_flag"]==1, "takeout_name"] = "お持ち帰り"

order_data.loc[order_data["status"]==0, "status_name"] = "受付"
order_data.loc[order_data["status"]==1, "status_name"] = "お支払済"
order_data.loc[order_data["status"]==2, "status_name"] = "お渡し済"
order_data.loc[order_data["status"]==9, "status_name"] = "キャンセル"

order_data.head()

In [None]:
from ipywidgets import Dropdown

def order_by_store(val):
    clear_output()
    display(dropdown)
    pick_data = order_data.loc[(order_data["store_name"]==val["new"]) & (order_data["status"].isin([1,2]))]
    display(pick_data.head())

store_list = m_store["store_name"].tolist()

dropdown = Dropdown(options=store_list)
dropdown.observe(order_by_store, names="value")
display(dropdown)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import japanize_matplotlib

def graph_by_store(val):
    clear_output()
    display(dropdown2)
    pick_data = order_data.loc[(order_data["store_name"]==val["new"]) & (order_data["status"].isin([1,2]))]
    temp =pick_data[["order_accept_date", "total_amount"]].copy()
    temp.loc[:,"order_accept_date"] = pd.to_datetime(temp["order_accept_date"])
    temp.set_index("order_accept_date", inplace=True)
    temp.resample("D").sum().plot()

dropdown2 = Dropdown(options=store_list)
dropdown2.observe(graph_by_store, names="value")
display(dropdown2)


### ノック２２：複数店舗の詳細を可視化できるようにしてみよう

In [None]:
from ipywidgets import SelectMultiple

def order_by_multi(val):
    clear_output()
    display(select)
    pick_data = order_data.loc[(order_data["store_name"].isin(val["new"])) & (order_data["status"].isin([1,2]))]
    display(pick_data.head())

select = SelectMultiple(options=store_list)
select.observe (order_by_multi, names="value")
display(select)

### ノック２３：スライドバーを用いてオーダー件数を調べてみよう

In [None]:
from ipywidgets import IntSlider

def store_lower(val):
    clear_output()
    display(slider)
    temp = order_data.groupby("store_name")
    print(temp.size()[temp.size()<val["new"]])

slider = IntSlider(valune=1100, min=1000, max=2000, step=100, description="件数：",)
slider.observe(store_lower, names="value")
display(slider)

In [None]:
def store_upper(val):
    clear_output()
    display(slider2)
    temp = order_data.groupby("store_name")
    print(temp.size()[temp.size()>val["new"]])

slider2 = IntSlider(valune=1600, min=1000, max=2000, step=100, description="件数：",)
slider2.observe(store_upper, names="value")
display(slider2)

### ノック２４：トグルボタンで地域データを抽出しよう

In [None]:
from ipywidgets import ToggleButtons

area_list = m_area["wide_area"].unique()

def order_by_area(val):
    clear_output()
    display(toggle)
    pick_data = order_data.loc[(order_data["wide_area"]==val["new"]) & (order_data["status"].isin([1,2]))]
    display(pick_data.head())

toggle = ToggleButtons(options=area_list)
toggle.observe(order_by_area, names="value")
display(toggle)

In [None]:
def graph_by_area(val):
    clear_output()
    display(toggle2)
    pick_data = order_data.loc[(order_data["wide_area"]==val["new"]) & (order_data["status"].isin([1,2]))]
    temp = pick_data[["order_accept_date", "total_amount"]].copy()
    temp["order_accept_date"] = pd.to_datetime(temp["order_accept_date"])
    temp.set_index("order_accept_date", inplace=True)
    temp.resample("D").sum().plot()

toggle2 = ToggleButtons(options=area_list)
toggle2.observe(graph_by_area, names="value")
display(toggle2)


### ノック２５：日付を指定してデータを抽出してみよう

In [None]:
from ipywidgets import DatePicker
import datetime

order_data.loc[:,"order_date"] = pd.to_datetime(order_data["order_accept_date"]).dt.date

def order_by_date(val):
    clear_output()
    display(date_picker)
    pick_data = order_data.loc[(order_data["order_date"]==val["new"]) & (order_data["status"].isin([1,2]))]
    print(len(pick_data))
    display(pick_data.head())

date_picker = DatePicker(value=datetime.datetime(2020, 4, 1))
date_picker.observe(order_by_date, names="value")
display(date_picker)

In [None]:
min_date = datetime.date(2020, 4, 1)
max_date = datetime.date(2020, 4, 30)

def order_between_data():
    clear_output()
    display(date_picker_min)
    display(date_picker_max)
    pick_data = order_data.loc[(order_data["order_date"] >= min_date) & (order_data["order_date"] <= max_date) & (order_data["status"].isin([1,2]))]
    print(len(pick_data))
    display(pick_data.head())

def set_min_date(val):
    global min_date
    min_date = val["new"]
    order_between_data()

def set_max_date(val):
    global max_date
    max_date = val["new"]
    order_between_data()

date_picker_min = DatePicker(value=min_date)
date_picker_min.observe(set_min_date, names="value")
print("最小日付")
display(date_picker_min)

date_picker_max = DatePicker(value=max_date)
date_picker_max.observe(set_max_date, names="value")
print("最大日付")
display(date_picker_max)


### ノック２６：ストーリーを考えてデータを構築しよう

In [None]:
# ライブラリのインポート
import pandas as pd
import os
import glob

# オーダーデータの読み込み
current_dir = os.getcwd()
tbl_order_file = os.path.join(current_dir, "tbl_order_*.csv")
tbl_order_files = glob.glob(tbl_order_file)

order_all = pd.DataFrame()
for file in tbl_order_files:
    order_tmp = pd.read_csv(file)
    print(f"{file}:{len(order_data)}")
    order_all = pd.concat([order_all, order_tmp], ignore_index=True)

# 不要なデータを除外
order_all = order_all[order_all["store_id"] != 999]

# マスターデータの除外
order_all = pd.merge(order_all, m_store, on="store_id", how="left")
order_all = pd.merge(order_all, m_area, on="area_cd", how="left")

# 名称を設定（お渡し方法）
order_all.loc[order_all["takeout_flag"] == 0, "takeout_name"] = "デリバリー"
order_all.loc[order_all["takeout_flag"] == 1, "takeout_name"] = "お持ち帰り"

# 名称を設定（注文状態）
order_all.loc[order_all["status"] == 0, "status_name"] = "受付"
order_all.loc[order_all["status"] == 1, "status_name"] = "お支払済"
order_all.loc[order_all["status"] == 2, "status_name"] = "お渡し済"
order_all.loc[order_all["status"] == 9, "status_name"] = "キャンセル"

order_all.loc[:,"order_date"] = pd.to_datetime(order_all["order_accept_date"]).dt.date
order_all.groupby(["store_id", "customer_id"])["total_amount"].describe()

In [None]:
summary_df = order_all.loc[order_all["status"].isin([1,2])]
store_summary_df = summary_df.groupby(["store_id"])["total_amount"].sum()
store_summary_df = pd.merge(store_summary_df, m_store, on="store_id", how="left")
print("売上上位")
display(store_summary_df.sort_values("total_amount", ascending=False).head(10))
print("売上下位")
display(store_summary_df.sort_values("total_amount", ascending=True).head(10))

In [None]:
cancel_df = pd.DataFrame()
cancel_cnt = order_all.loc[order_all["status"]==9].groupby(["store_id"])["store_id"].count()
order_cnt = order_all.loc[order_all["status"].isin([1,2,9])].groupby(["store_id"])["store_id"].count()
cancel_rate = (cancel_cnt/order_cnt)*100
cancel_df["cancel_rate"] = cancel_rate
cancel_df = pd.merge(cancel_df, m_store, on="store_id", how="left")
print("キャンセル率が低い")
display(cancel_df.sort_values("cancel_rate", ascending=True).head(10))
print("キャンセル率が高い")
display(cancel_df.sort_values("cancel_rate", ascending=False).head(10))

### ノック２７：キャンセルの理由を分析してみよう

In [None]:
def calc_delta(t):
  t1, t2 = t
  delta = t2 - t1
  return delta.total_seconds()/60

order_all.loc[:,"order_accept_datetime"] = pd.to_datetime(order_all["order_accept_date"])
order_all.loc[:,"delivered_datetime"] = pd.to_datetime(order_all["delivered_date"])
order_all.loc[:,"delta"] = order_all[["order_accept_datetime", "delivered_datetime"]].apply(calc_delta, axis=1)

delivery_df = order_all.loc[(order_all["status"]==2) & (order_all["store_id"].isin([8,122]))]
delivery_df.groupby(["store_id"])["delta"].mean()

### ノック２８：仮説を検証してみよう

In [None]:
temp_cancel = cancel_df.copy()
temp_delivery = order_all.loc[order_all["status"]==2].groupby([("store_id")])["delta"].mean()
check_df = pd.merge(temp_cancel, temp_delivery, on="store_id", how="left")
check_df.head()

In [None]:
# 全体
temp_chk = check_df[["cancel_rate", "delta"]]
display(temp_chk.corr())

In [None]:
# キャンセル率が高い（第3四分位以上）店舗のみ
th_high = check_df["cancel_rate"].quantile(0.75)
temp_chk = check_df.loc[(check_df["cancel_rate"] >= th_high)]
temp_chk = temp_chk[["cancel_rate", "delta"]]
display(temp_chk.corr())

# キャンセル率が低い（第1四分位以上）店舗のみ
th_low = check_df["cancel_rate"].quantile(0.25)
temp_chk = check_df.loc[(check_df["cancel_rate"] >= th_low)]
temp_chk = temp_chk[["cancel_rate", "delta"]]
display(temp_chk.corr())

### ノック２９：ストーリーをもとにパーツやデータを組み合わせてダッシュボードを作ろう

In [None]:
import seaborn as sns

# 環境変数
target_store = ""
min_date = datetime.date(2020, 4, 1)
max_date = datetime.date(2020, 4, 30)

In [None]:
def make_board():
  clear_output()
  display(toggle_db)

  # データ作成処理
  pick_order_data = order_all.loc[(order_all["store_name"]==target_store) & (order_all["order_date"] >= min_date) & (order_all["order_date"] <= max_date) & (order_all["status"].isin([1,2]))]
  pick_cancel_data = order_all.loc[(order_all["store_name"]==target_store) & (order_all["order_date"] >= min_date) & (order_all["order_date"] <= max_date) & (order_all["status"]==9)]
  pick_order_all = order_all.loc[(order_all["order_date"] >= min_date) & (order_all["order_date"] <= max_date) & (order_all["status"].isin([1,2]))]
  pick_cancel_all = order_all.loc[(order_all["order_date"] >= min_date) & (order_all["order_date"] <= max_date) & (order_all["status"]==9)]
  store_o_cnt = len(pick_order_data)
  store_c_cnt = len(pick_order_data["customer_id"].unique())
  store_cancel_rate = (len(pick_cancel_data)/(len(pick_order_data)+len(pick_cancel_data)))*100
  delivary_time = pick_order_data.loc[pick_order_data["status"]==2]["delta"].mean()
  delivary_time_all = pick_order_all.loc[pick_order_all["status"]==2]["delta"].mean()

  # 画面の描画処理
  temp = pick_order_data[["order_date", "total_amount"]].copy()
  temp.loc[:, "order_date"] = pd.to_datetime(temp["order_date"])
  temp.set_index("order_date", inplace=True)

  print("===================================================================================================================")
  str_out = f"◼️◼️{target_store}◼️◼️【対象期間】：{min_date}〜{max_date}"
  str_out = str_out + f"【オーダー件数】：{store_o_cnt}件 【利用顧客数】：{store_c_cnt}"
  print(str_out)
  print("-------------------------------------------------------------------------------------------------------------------")
  print(f"◼️◼️◼️◼️◼️◼️ 日毎の売上 ◼️◼️◼️◼️◼️◼️◼️◼️")
  display(temp.resample("D").sum())
  print("-------------------------------------------------------------------------------------------------------------------")
  str_out = f"【期間売上総額】：{'{:,}'.format(temp['total_amount'].sum())} "
  str_out = str_out + f"【キャンセル総額】：{'{:,}'.format(pick_cancel_data['total_amount'].sum())} "
  str_out = str_out + f"【キャンセル率】：{round(store_cancel_rate, 2)} % "
  print(str_out)
  print("-------------------------------------------------------------------------------------------------------------------")

  # グラフ作成処理
  fig, (ax1,ax2) = plt.subplots(1, 2, figsize=(15,5))
  sns.distplot(temp.resample("D").sum(), ax=ax1, kde=False)
  ax1.set_title("売上（日単位）ヒストグラム")

  sns.countplot(x="order_date", data=pick_cancel_data, ax=ax2)
  ax2.set_title("キャンセル数（日単位）")

  fig, (ax3) = plt.subplots(1, 1, figsize=(20,5))
  sns.boxplot(x="order_date", y="total_amount", data=pick_order_data)
  ax3.set_title("オーダー状況箱ひげ図")
  plt.show()

In [None]:
# カレンダー変更時の処理
def change_date_min(val):
    global min_date
    min_date = val["new"]
    make_board()

def change_date_max(val):
    global max_date
    max_date = val["new"]
    make_board()

In [None]:
# ドロップダウン変更時の処理
def change_dropdown(val):
    global target_store
    target_store = val["new"]

    # 期間指定機能
    date_picker_min = DatePicker(value=min_date)
    date_picker_min.observe(change_date_min, names="value")
    print("期間")

    date_picker_max = DatePicker(value=max_date)
    date_picker_max.observe(change_date_max, names="value")
    display(date_picker_min, date_picker_max)

In [None]:
# 地域トグルボタン処理
def order_by_area(val):
  clear_output()
  display(toggle_db)
  # 選択された地域の店舗リストを作成する
  store_list = order_all.loc[order_all["wide_area"] == val["new"]]["store_name"].unique()
  # 作成された店舗リストでドロップダウンを作成する
  dropdown = Dropdown(options=store_list)
  dropdown.observe(change_dropdown, names="value")
  display(dropdown)

In [None]:
# トグルボタンを表示
toggle_db = ToggleButtons(options=area_list)
toggle_db.observe(order_by_area, names="value")
display(toggle_db)

### ノック３０：ダッシュボードを改善しよう