# Bokeh

Plotlyと同様にインタラクティブなグラフを作成できる。Plotlyより多くのコードが必要かつ煩雑で入力補完が効かない欠点があるが、非常に汎用性が高い。

## グラフの種類

- 散布図
- バブルチャート
- 棒グラフ
- 積み上げ棒グラフ
- 折れ線グラフ
- 面グラフ
- ツリーマップ
- 100%積み上げグラフ
- 円グラフ
- ヒストグラム
- 箱ひげ図
- バイオリンプロット
- ヒートマップ

ライブラリのインポート

In [1]:
# ライブラリのインポート
from bokeh import palettes
from bokeh.transform import transform
from bokeh.layouts import column
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import (
    DatetimeTickFormatter,
    ColorBar,
    LinearColorMapper,
    ColumnDataSource,
)
import pandas as pd
import numpy as np

output_notebook()

## 散布図（`plot.scatter()`）

### 主な引数とその説明

[ScatterのAPI](https://plotly.com/python/reference/scatter/#scatter)

|引数|説明|
|---|---|
|x|配列、もしくは`ColumnDataSource`のキーを渡す。X軸の値。|
|y|配列、もしくは`ColumnDataSource`のキーを渡す。Y軸の値。|
|color|`str`型でマーカー全体の色を変換。`LineColorMapper`を渡すと、それに応じた色に変換|
|size|`int`型でマーカー全体のサイズを変換。`ColumnDataSource`のキーを渡すとマーカーを値に応じた大きさに変換する。|
|marker|[MarkerType](https://docs.bokeh.org/en/latest/docs/reference/core/enums.html#bokeh.core.enums.MarkerType)に応じたシンボルを与えるとマーカーの形状が変化する。'circle'または'o'で通常のプロット|
|line_color|マーカーの縁の色|
|line_width|マーカーの縁の幅|
|fill_alpha|マーカーの透明度|
|source|データソースオブジェクト`ColumnDataSource`|

In [2]:
# データの読み込み

scatter_df = pd.read_csv("../data/diabetes.csv")
scatter_df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,y
0,59,2,32.1,101.0,157,93.2,38.0,4.0,4.8598,87,151
1,48,1,21.6,87.0,183,103.2,70.0,3.0,3.8918,69,75
2,72,2,30.5,93.0,156,93.6,41.0,4.0,4.6728,85,141
3,24,1,25.3,84.0,198,131.4,40.0,5.0,4.8903,89,206
4,50,1,23.0,101.0,192,125.4,52.0,4.0,4.2905,80,135


In [3]:
src = ColumnDataSource(
    dict(
        bmi=scatter_df["bmi"],
        bp=scatter_df["bp"],
    )
)

plot = figure(
    width=700,
    height=500,
    x_axis_label="BMI",
    y_axis_label="血圧 [mmHg]",
    title="BMIと血圧の関係",
    outline_line_color="black",
    outline_line_width=2,
    x_minor_ticks=5,
    y_minor_ticks=2,
)

plot.scatter(
    source=src,
    x="bmi",
    y="bp",
    color="blue",
    line_color="black",
    fill_alpha=0.5,
    size=8,
    marker="o",
)
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"
plot.grid.visible = False

show(plot)

## バブルチャート（`plot.scatter()`）

In [4]:
src = ColumnDataSource(
    dict(
        x=scatter_df["bmi"],
        y=scatter_df["bp"],
        c=scatter_df["y"],
        s=scatter_df["y"] / 20
    )
)

color_mapper = LinearColorMapper(
    palette=palettes.Reds256[::-1],  # カラーマップは"Reds"(256段階)
    low=scatter_df["y"].min(),       # カラーマップの最小値
    high=scatter_df["y"].max()       # カラーマップの最大値
)

color_bar = ColorBar(color_mapper=color_mapper, )  # カラーバー

plot = figure(
    width=700,
    height=500,
    x_axis_label="BMI",
    y_axis_label="血圧 [mmHg]",
    title="BMIと血圧の関係",
    x_minor_ticks=5,
    y_minor_ticks=2,
    outline_line_color="black",
    outline_line_width=2,
)

plot.scatter(
    source=src,
    x="x",
    y="y",
    size="s",
    color=transform("c", color_mapper),  # マーカーの色を"c"の値に応じ、Redsのカラーマップにより変更
    marker="circle",
    line_color="black",
)

plot.grid.visible = False
plot.add_layout(color_bar, place="right")    # カラーバーをグラフに追加

plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"

show(plot)

## 棒グラフ（`plot.vbar()`, `plot.hbar()`）

In [5]:
bar_df = pd.read_csv(
    "../data/city_temperature.csv",
    sep="\t",  # タブ区切りのデータを取得
    index_col=None,
    parse_dates=["date"],  # 文字列から日付データに変換したい列名
    date_format="%Y%m%d",  # 日付データのフォーマット 例) 20111001 -> 2011-10-01
)

bar_df = bar_df.set_index("date")

ny_temp = bar_df["New York"].resample("ME").mean()
ny_temp_std = bar_df["New York"].resample("ME").std()
sf_temp = bar_df["San Francisco"].resample("ME").mean()
sf_temp_std = bar_df["San Francisco"].resample("ME").std()
austin_temp = bar_df["Austin"].resample("ME").mean()
austin_temp_std = bar_df["Austin"].resample("ME").std()
month = [f"{m}月" for m in bar_df.index.month.unique()]

src = ColumnDataSource(
    {
        "month": month,
        "NY_mean": ny_temp,
        "NY_std": ny_temp_std,
        "SF_mean": sf_temp,
        "SF_std": sf_temp_std,
        "Austin_mean": austin_temp,
        "Austin_std": austin_temp_std,
    }
)

In [6]:
plot = figure(
    title="NYの気温（月別平均）",
    x_range=month,
    y_range=[0, 85],
    x_axis_label="Month",
    y_axis_label="Temperature [F]",
    width=700,
    height=500,
    x_minor_ticks=5,
    y_minor_ticks=5,
    outline_line_color="black",
    outline_line_width=2,
)

plot.vbar(
    source=src,
    x="month",
    top="NY_mean",
    width=0.8,
)

plot.segment(
    x0=month, y0=ny_temp - ny_temp_std,
    x1=month, y1=ny_temp + ny_temp_std,
    line_color="black",
    line_width=2
)

plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = "black"
plot.ygrid.grid_line_dash = "dotted"
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"

show(plot)

In [7]:
plot = figure(
    title="NYの気温（月別平均）",
    y_range=month,
    x_range=[0, 85],
    y_axis_label="Month",
    x_axis_label="Temperature [F]",
    width=700,
    height=500,
    outline_line_color="black",
    outline_line_width=2,
)

plot.hbar(
    source=src,
    y="month",
    right="NY_mean",
    height=0.8,
)

plot.segment(
    y0=month, x0=ny_temp - ny_temp_std,
    y1=month, x1=ny_temp + ny_temp_std,
    line_color="black",
    line_width=2
)

plot.ygrid.grid_line_color = None
plot.xgrid.grid_line_color = "black"
plot.xgrid.grid_line_dash = "dotted"
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"

show(plot)

In [8]:
from bokeh.transform import dodge

plot = figure(
    title="NYの気温（月別平均）",
    x_range=month,
    y_range=[0, 100],
    x_axis_label="Month",
    y_axis_label="Temperature [F]",
    width=700,
    height=500,
    outline_line_color="black",
    outline_line_width=2,
)

color_palette = palettes.Set1[3]

for i, city in enumerate(["NY", "SF", "Austin"]):
    plot.vbar(
        source=src,
        x=dodge(field_name="month", value=-0.25+0.25*i, range=plot.x_range),
        top=f"{city}_mean",
        width=0.2,
        color=color_palette[i],
        legend_label=city
    )

plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = "black"
plot.ygrid.grid_line_dash = "dotted"
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"
plot.legend.border_line_width = 2
plot.legend.border_line_color = "black"
plot.add_layout(plot.legend[0], place="right")


show(plot)

## 積み重ね棒グラフ

In [9]:
stacked_bar_df = pd.read_csv("../data/japan_energy.csv")
stacked_bar_df.head()

Unnamed: 0,年度,石油,石炭,天然ガス,原子力,水力,新エネルギー・地熱等
0,1965,3.56,1.87,0.08,0.0,0.75,0.11
1,1966,4.13,1.92,0.09,0.01,0.77,0.11
2,1967,5.11,2.29,0.09,0.01,0.67,0.11
3,1968,5.95,2.42,0.1,0.01,0.72,0.12
4,1969,7.19,2.59,0.12,0.01,0.73,0.13


In [10]:
src = ColumnDataSource(
    stacked_bar_df
)

# http://ja.dochub.org/bokeh/docs/user_guide/styling.html
hatch_patterns = [".", "`", "-", ",", "v", ">"]

plot = figure(
    title="日本国内のエネルギー供給量の推移(1973年～2012年)",
    y_range=[0, 30],
    x_range=[1965 - 0.5, 2012 + 0.5],
    y_axis_label="国内供給量[10^18 J]",
    x_axis_label="年度",
    width=800,
    height=500,
    outline_line_color="black",
    outline_line_width=2
)

column_list = [col for col in stacked_bar_df.columns[1:]]
color_palette = palettes.RdYlGn[len(stacked_bar_df.columns[1:])]

plot.vbar_stack(
    source=src,
    stackers=column_list,
    x="年度",
    width=0.6,
    color=color_palette,
    legend_label=column_list,
)

plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = "black"
plot.ygrid.grid_line_dash = "dotted"  # "solid", "dashed", "dotted", "dotdash", "dashdot"
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"
plot.legend.border_line_width = 2
plot.legend.border_line_color = "black"

plot.add_layout(plot.legend[0], place="right")

show(plot)

## 折れ線グラフ

In [11]:
# プロット用のデータ取得

plot_df = pd.read_csv(
    "../data/city_temperature.csv",
    sep="\t",  # タブ区切りのデータを取得
    index_col=None,
    parse_dates=["date"],  # 文字列から日付データに変換したい列名
    date_format="%Y%m%d",  # 日付データのフォーマット 例) 20111001 -> 2011-10-01
)

# データの確認
plot_df.head()

Unnamed: 0,date,New York,San Francisco,Austin
0,2011-10-01,63.4,62.7,72.2
1,2011-10-02,58.0,59.9,67.7
2,2011-10-03,53.3,59.1,69.4
3,2011-10-04,55.7,58.8,68.0
4,2011-10-05,64.2,58.7,72.4


In [12]:
plot = figure(
    width=700,
    height=500,
    title="アメリカ3州の気温の変化",
    x_axis_label="Date",
    x_axis_type="datetime",
    y_axis_label="Temperature [F]",
    y_minor_ticks=2,
    outline_line_color="black",
    outline_line_width=2
)

color_palette = palettes.Category10[3]
src = ColumnDataSource(
    plot_df
)

for i, city in enumerate(plot_df.columns[1:]):
    plot.line(
        source=src,
        x="date",
        y=city,
        width=1,
        color=color_palette[i],
        legend_label=city
    )

plot.xaxis.formatter = DatetimeTickFormatter(months="%b, %Y")
plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = None
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"
plot.legend.border_line_width = 2
plot.legend.border_line_color = "black"
plot.add_layout(plot.legend[0], place="right")
show(plot)

In [13]:
plot_list = list()

for i, city in enumerate(plot_df.columns[1:]):
    plot = figure(
        width=700,
        height=300,
        title=city,
        x_axis_label="Date",
        x_axis_type="datetime",
        y_axis_label="Temperature [F]",
        outline_line_color="black",
        outline_line_width=2,
        y_minor_ticks=2,
    )

    plot.line(
        source=src,
        x="date",
        y=city,
        line_color=color_palette[i]
    )
    plot.xaxis.formatter = DatetimeTickFormatter(months="%b, %Y")
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None
    plot.title.text_font_size = "24px"
    plot.xaxis.axis_label_text_font_size= "18px"
    plot.xaxis.major_label_text_font_size= "14px"
    plot.yaxis.axis_label_text_font_size= "18px"
    plot.yaxis.major_label_text_font_size= "14px"
    plot_list.append(plot)

show(column(plot_list))

## 面グラフ

In [14]:
stacked_area_df = pd.read_csv("../data/japan_energy.csv")
stacked_area_df.head()

Unnamed: 0,年度,石油,石炭,天然ガス,原子力,水力,新エネルギー・地熱等
0,1965,3.56,1.87,0.08,0.0,0.75,0.11
1,1966,4.13,1.92,0.09,0.01,0.77,0.11
2,1967,5.11,2.29,0.09,0.01,0.67,0.11
3,1968,5.95,2.42,0.1,0.01,0.72,0.12
4,1969,7.19,2.59,0.12,0.01,0.73,0.13


In [15]:
src = ColumnDataSource(stacked_area_df)

# http://ja.dochub.org/bokeh/docs/user_guide/styling.html
hatch_patterns = [".", "o", "-", "v", ">", "/"]

plot = figure(
    width=800,
    height=600,
    title="日本国内のエネルギー供給量の推移(1973年～2012年)",
    x_axis_label="年度",
    y_axis_label="国内供給量[10^18 J]",
    x_range=[1965, 2012],
    y_range=[0, 30],
    outline_line_color="black",
    outline_line_width=2,
    x_minor_ticks=5,
    y_minor_ticks=5,
)

column_list = [col for col in stacked_area_df.columns[1:]]
color_palette = palettes.RdYlGn[len(stacked_area_df.columns[1:])]

plot.varea_stack(
    source=src,
    stackers=column_list,
    x="年度",
    color=color_palette,
    hatch_pattern=hatch_patterns,
    hatch_color="black",
    hatch_scale=12,
    legend_label=column_list
)

plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = "black"
plot.ygrid.grid_line_dash = "dotted"
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"
plot.legend.border_line_width = 2
plot.legend.border_line_color = "black"
plot.legend.location = "top_left"
plot.legend.background_fill_color = "white"
plot.legend.background_fill_alpha = 1
plot.legend.label_text_font_size = "14px"
# plot.add_layout(plot.legend[0], place="right")

show(plot)

## 100%積み重ね棒グラフ

In [16]:
stacked_bar_100_df = pd.read_csv("../data/japan_energy.csv", index_col=0)
# %に変換
stacked_bar_100_df = (
    stacked_bar_100_df.div(stacked_bar_100_df.sum(axis=1), axis=0) * 100
)
stacked_bar_100_df.reset_index(inplace=True)
stacked_bar_100_df.head()

Unnamed: 0,年度,石油,石炭,天然ガス,原子力,水力,新エネルギー・地熱等
0,1965,55.88697,29.356358,1.255887,0.0,11.77394,1.726845
1,1966,58.748222,27.311522,1.280228,0.142248,10.953058,1.564723
2,1967,61.714976,27.657005,1.086957,0.120773,8.091787,1.328502
3,1968,63.841202,25.965665,1.072961,0.107296,7.725322,1.287554
4,1969,66.759517,24.048282,1.114206,0.092851,6.778087,1.207057


In [17]:
src = ColumnDataSource(stacked_bar_100_df)

plot = figure(
    width=1000,
    height=500,
    title="日本国内のエネルギー供給量の推移(1973年～2012年)",
    x_axis_label="年度",
    y_axis_label="国内供給量[%]",
    x_range=[1965 - 0.5, 2012 + 0.5],
    y_range=[0, 100],
    outline_line_color="black",
    outline_line_width=2
)

column_list = [col for col in stacked_bar_100_df.columns[1:]]
color_palette = palettes.RdYlGn[len(stacked_bar_100_df.columns[1:])]

plot.vbar_stack(
    source=src,
    stackers=column_list,
    x="年度",
    width=0.8,
    color=color_palette,
    legend_label=column_list
)

plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = None
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"
plot.legend.border_line_width = 2
plot.legend.border_line_color = "black"

plot.add_layout(plot.legend[0], place="right")

show(plot)

## 円グラフ

In [18]:
pie_df = pd.read_csv("../data/japan_energy.csv", index_col=0)
pie_1965_df = pie_df.loc[1965, :]
pie_1965_data = {
    "name": pie_1965_df.index.to_list(),
    "angle": [value /pie_1965_df.sum() * 2 * np.pi for value in pie_1965_df],
    "cumulative_angle": [(sum(pie_1965_df[0: i+1]) - (value/2)) / sum(pie_1965_df) * 2 * np.pi for i, value in enumerate(pie_1965_df)],
    "percent": [value / sum(pie_1965_df) * 100 for value in pie_1965_df],
    "color": palettes.Category10[6]
}
pie_1965_data["sin"] = np.sin(pie_1965_data["cumulative_angle"]) * 0.4
pie_1965_data["cos"] = np.cos(pie_1965_data["cumulative_angle"]) * 0.4
pie_1965_data["label"] = [f"{p:.1f}%" for p in pie_1965_data["percent"]]

pie_2012_df = pie_df.loc[2012, :]
pie_2012_data = {
    "name": pie_2012_df.index.to_list(),
    "angle": [value /pie_2012_df.sum() * 2 * np.pi for value in pie_2012_df],
    "cumulative_angle": [(sum(pie_2012_df[0: i+1]) - (value/2)) / sum(pie_2012_df) * 2 * np.pi for i, value in enumerate(pie_2012_df)],
    "percent": [value / sum(pie_2012_df) * 100 for value in pie_2012_df],
    "color": palettes.Category10[6]
}
pie_2012_data["sin"] = np.sin(pie_2012_data["cumulative_angle"]) * 0.4
pie_2012_data["cos"] = np.cos(pie_2012_data["cumulative_angle"]) * 0.4
pie_2012_data["label"] = [f"{p:.1f}%" for p in pie_2012_data["percent"]]
pie_df.head()

Unnamed: 0_level_0,石油,石炭,天然ガス,原子力,水力,新エネルギー・地熱等
年度,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1965,3.56,1.87,0.08,0.0,0.75,0.11
1966,4.13,1.92,0.09,0.01,0.77,0.11
1967,5.11,2.29,0.09,0.01,0.67,0.11
1968,5.95,2.42,0.1,0.01,0.72,0.12
1969,7.19,2.59,0.12,0.01,0.73,0.13


In [19]:
from bokeh.transform import cumsum
from bokeh.models import LabelSet

plot = figure(height=600, width=600, title="1965年")

plot.wedge(
    source=ColumnDataSource(pie_1965_data),
    x=0,
    y=0,
    radius=0.7,
    start_angle=cumsum("angle", include_zero=True),
    end_angle=cumsum("angle"),
    line_color="white",
    fill_color="color",
    legend_label="name",
)

labels = LabelSet(
    source=ColumnDataSource(pie_1965_data),
    x="cos",
    y="sin",
    text="label",
    text_font_size="18pt",
    text_color="black",
    text_align="center",
)

plot.add_layout(labels)
plot.axis.axis_label = None
plot.axis.visible = False
plot.grid.grid_line_color = None
show(plot)

plot = figure(height=600, width=600, title="2012年")

plot.wedge(
    source=ColumnDataSource(pie_2012_data),
    x=0,
    y=0,
    radius=0.7,
    start_angle=cumsum("angle", include_zero=True),
    end_angle=cumsum("angle"),
    line_color="white",
    fill_color="color",
    legend_label="name",
)

labels = LabelSet(
    source=ColumnDataSource(pie_2012_data),
    x="cos",
    y="sin",
    text="label",
    text_font_size="18pt",
    text_color="black",
    text_align="center",
)

plot.add_layout(labels)
plot.axis.axis_label = None
plot.axis.visible = False
plot.grid.grid_line_color = None

show(plot)

## ヒストグラム

In [20]:
hist_df = pd.read_csv("../data/diamond.csv")
print("cutの種類", len(hist_df["cut"].unique()))

hist_data = {
    "cut": list(hist_df["cut"].unique()),
}

for cut in hist_df["cut"].unique():
    price_per_cut_type = hist_df[hist_df["cut"] == cut]["price"].to_numpy()
    price_hist, price_edge = np.histogram(price_per_cut_type, bins=250)
    hist_data[f"{cut}_price_hist"] = price_hist
    hist_data[f"{cut}_price_edge"] = price_edge

hist_df.head()

cutの種類 5


Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [21]:
plot_list = list()

for i, cut in enumerate(hist_df["cut"].unique()):
    plot = figure(
        width=700,
        height=300,
        title=f"cut = {cut}",
        x_axis_label="price",
        y_axis_label="count",
        y_minor_ticks=2,
        outline_line_color="black",
        outline_line_width=2
    )

    plot.quad(
        top=hist_data[f"{cut}_price_hist"],
        bottom=0,
        left=hist_data[f"{cut}_price_edge"][:-1],
        right=hist_data[f"{cut}_price_edge"][1:],
        line_color=color_palette[i]
    )
    plot.y_range.start = 0
    plot.y_range.end = 1200
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None
    plot.title.text_font_size = "24px"
    plot.xaxis.axis_label_text_font_size= "18px"
    plot.xaxis.major_label_text_font_size= "14px"
    plot.yaxis.axis_label_text_font_size= "18px"
    plot.yaxis.major_label_text_font_size= "14px"
    plot_list.append(plot)

show(column(plot_list))

## 箱ひげ図

In [22]:
box_plot_df = pd.read_csv("../data/iris.csv")
display(box_plot_df.head())
# box_plot_df.head()
box_plot_data = box_plot_df.iloc[:, :-1].quantile([0.25, 0.5, 0.75]).T.reset_index()
box_plot_data = box_plot_data.rename(columns={0.25: "q1", 0.5: "q2", 0.75: "q3"})
display(box_plot_data)
iqr = box_plot_data["q3"] - box_plot_data["q1"]
box_plot_data["upper"] = box_plot_data["q3"] + 1.5*iqr
box_plot_data["lower"] = box_plot_data["q1"] - 1.5*iqr
box_plot_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


Unnamed: 0,index,q1,q2,q3
0,sepal_length,5.1,5.8,6.4
1,sepal_width,2.8,3.0,3.3
2,petal_length,1.6,4.35,5.1
3,petal_width,0.3,1.3,1.8


Unnamed: 0,index,q1,q2,q3,upper,lower
0,sepal_length,5.1,5.8,6.4,8.35,3.15
1,sepal_width,2.8,3.0,3.3,4.05,2.05
2,petal_length,1.6,4.35,5.1,10.35,-3.65
3,petal_width,0.3,1.3,1.8,4.05,-1.95


In [23]:
from bokeh.models import Whisker

plot = figure(
    width=800,
    height=500,
    title="Iris Dataset 特徴量",
    y_axis_label="[cm]",
    x_axis_label="特徴量",
    x_range=box_plot_data["index"],
    y_range=[0, 10],
    outline_line_color="black",
    outline_line_width=2
)

# column_list = [col for col in stacked_bar_100_df.columns[1:]]
# color_palette = palettes.RdYlGn[len(stacked_bar_100_df.columns[1:])]

whisker = Whisker(
    source=ColumnDataSource(box_plot_data),
    base="index",
    upper="upper",
    lower="lower"
)

plot.add_layout(whisker)

plot.vbar(
    source=ColumnDataSource(box_plot_data),
    x="index",
    width=0.7,
    bottom="q2",
    top="q3"
)
plot.vbar(
    source=ColumnDataSource(box_plot_data),
    x="index",
    width=0.7,
    bottom="q1",
    top="q2"
)

plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = None
plot.title.text_font_size = "24px"
plot.xaxis.axis_label_text_font_size= "18px"
plot.xaxis.major_label_text_font_size= "14px"
plot.yaxis.axis_label_text_font_size= "18px"
plot.yaxis.major_label_text_font_size= "14px"

show(plot)