# 第13章　時間を表現するグラフ

## 13.1 エラーバー付き折れ線グラフとリッジラインプロット

In [1]:
from datetime import date, timedelta
import json
import seaborn as sns

from plotly import graph_objects as go
from plotly import express as px
from plotly.colors import n_colors
from plotly.graph_objs.layout import Template

# TaxisデータセットからDataFrameを読み込み
df = sns.load_dataset('taxis')
df = df[[
    'pickup', 'passengers', 'distance',
    'fare', 'tip', 'total',
]]

df = df.sort_values('pickup').reset_index(drop=True)

df

Unnamed: 0,pickup,passengers,distance,fare,tip,total
0,2019-02-28 23:29:03,1,0.90,5.0,0.00,6.30
1,2019-03-01 00:03:29,3,2.16,10.0,2.00,15.80
2,2019-03-01 00:08:32,3,7.35,22.5,1.00,27.30
3,2019-03-01 00:15:53,1,7.00,25.5,7.30,36.60
4,2019-03-01 00:29:22,4,0.74,4.5,1.00,9.30
...,...,...,...,...,...,...
6428,2019-03-31 22:13:37,1,1.00,7.5,0.70,12.00
6429,2019-03-31 22:32:27,1,0.40,3.5,1.45,8.75
6430,2019-03-31 22:51:53,1,0.67,4.5,1.66,9.96
6431,2019-03-31 23:15:03,1,3.03,11.5,3.82,19.12


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6433 entries, 0 to 6432
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   pickup      6433 non-null   datetime64[ns]
 1   passengers  6433 non-null   int64         
 2   distance    6433 non-null   float64       
 3   fare        6433 non-null   float64       
 4   tip         6433 non-null   float64       
 5   total       6433 non-null   float64       
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 301.7 KB


In [3]:
# 列「pickup」を行インデックスに指定
df = df.set_index('pickup')

df

Unnamed: 0_level_0,passengers,distance,fare,tip,total
pickup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-28 23:29:03,1,0.90,5.0,0.00,6.30
2019-03-01 00:03:29,3,2.16,10.0,2.00,15.80
2019-03-01 00:08:32,3,7.35,22.5,1.00,27.30
2019-03-01 00:15:53,1,7.00,25.5,7.30,36.60
2019-03-01 00:29:22,4,0.74,4.5,1.00,9.30
...,...,...,...,...,...
2019-03-31 22:13:37,1,1.00,7.5,0.70,12.00
2019-03-31 22:32:27,1,0.40,3.5,1.45,8.75
2019-03-31 22:51:53,1,0.67,4.5,1.66,9.96
2019-03-31 23:15:03,1,3.03,11.5,3.82,19.12


In [4]:
# Traceを作成
trace = go.Scatter(
    x=df.index,         # x軸に使用する変数
    y=df['distance'],   # y軸に使用する変数
    mode='lines'        # グラフモード（折れ線グラフ）
)   # 走行距離の折れ線グラフ

# 独自テンプレートを読み込み
with open('custom_white.json') as f:
    custom_white_dict = json.load(f)
    template = Template(custom_white_dict)

# Layoutを作成
layout = go.Layout(
    template=template,
    title='Taxis dataset',
    xaxis={'title': 'Pickup'},
    yaxis={'title': 'Distance'}
)

# Figureを作成
figure = go.Figure(trace, layout)

figure

In [5]:
# 日単位での平均値のDataFrameを作成
df_day = df.resample('D')
df_day_mean = df_day.mean()

df_day_mean.head(10)

Unnamed: 0_level_0,passengers,distance,fare,tip,total
pickup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-28,1.0,0.9,5.0,0.0,6.3
2019-03-01,1.53527,2.656805,12.228091,1.835975,17.484772
2019-03-02,1.565657,2.771212,11.909091,1.686717,16.762727
2019-03-03,1.56213,3.278343,12.946095,1.819349,17.913136
2019-03-04,1.561404,3.414094,13.659298,1.958947,19.117427
2019-03-05,1.675439,3.143114,13.763026,1.89114,19.199035
2019-03-06,1.51751,2.789339,12.722568,2.059767,18.185253
2019-03-07,1.518349,2.922064,13.296514,2.005688,18.879358
2019-03-08,1.412766,3.099532,13.482979,2.049277,19.251957
2019-03-09,1.593137,3.249951,13.43652,1.8225,18.557843


In [6]:
# 日単位での標準偏差のDataFrameを作成
df_day_std = df_day.std()

df_day_std.head(10)

Unnamed: 0_level_0,passengers,distance,fare,tip,total
pickup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-28,,,,,
2019-03-01,1.221253,3.237925,10.100446,2.139902,12.09898
2019-03-02,1.243555,3.391074,9.408756,1.93712,10.698145
2019-03-03,1.1992,3.71714,9.922348,2.139158,11.24269
2019-03-04,1.310827,4.177944,12.706265,2.779238,15.315941
2019-03-05,1.337337,4.072125,12.827817,2.117964,14.346349
2019-03-06,1.139098,3.82758,11.001655,2.357078,13.4513
2019-03-07,1.160857,3.500161,10.683593,2.126592,12.065092
2019-03-08,1.023154,4.328706,13.240612,2.643876,16.178493
2019-03-09,1.234221,4.174954,11.004069,2.255442,12.969398


In [7]:
# Traceを作成
trace = go.Scatter(
    x=df_day_mean.index,
    y=df_day_mean['fare'],
    mode='lines',
    error_y={
        'type': 'data',
        'array': df_day_std['fare'],    # エラーバーに使用する変数
        'visible': True,
    }
)   # 乗車運賃のエラーバー付き折れ線グラフ（平均±1標準偏差）

# Layoutを作成
layout = go.Layout(
    template=template,
    title='Taxis dataset',
    xaxis={'title': 'Pickup'},
    yaxis={'title': 'Fare'}
)

# Figureを作成
figure = go.Figure(trace, layout)

figure

In [8]:
# 日単位での最大値のDataFrameを作成
df_day_max = df_day.max()

df_day_max.head(10)

Unnamed: 0_level_0,passengers,distance,fare,tip,total
pickup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-28,1,0.9,5.0,0.0,6.3
2019-03-01,6,21.27,65.59,13.66,81.96
2019-03-02,6,25.28,70.0,12.21,73.27
2019-03-03,6,20.39,52.0,15.26,76.32
2019-03-04,6,21.3,79.5,15.26,80.8
2019-03-05,6,22.81,86.14,13.52,92.4
2019-03-06,6,23.61,59.5,13.1,78.66
2019-03-07,6,25.51,93.5,12.21,94.8
2019-03-08,6,36.66,100.0,20.56,123.36
2019-03-09,6,28.3,67.5,13.0,76.7


In [9]:
# 日単位での最小値のDataFrameを作成
df_day_min = df_day.min()

df_day_min.head(10)

Unnamed: 0_level_0,passengers,distance,fare,tip,total
pickup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-28,1,0.9,5.0,0.0,6.3
2019-03-01,0,0.0,2.5,0.0,3.3
2019-03-02,0,0.0,2.5,0.0,3.3
2019-03-03,0,0.1,3.0,0.0,3.8
2019-03-04,0,0.0,2.5,0.0,3.3
2019-03-05,0,0.0,2.5,0.0,3.3
2019-03-06,0,0.0,2.5,0.0,3.8
2019-03-07,1,0.1,2.5,0.0,3.3
2019-03-08,0,0.0,1.0,0.0,1.3
2019-03-09,0,0.0,2.5,0.0,3.3


In [10]:
# エラーバーの算出
error_positive = df_day_max['total'] - df_day_mean['total']     # 上側
error_negative = df_day_mean['total'] - df_day_min['total']     # 下側

# Traceを作成
trace = go.Scatter(
    x=df_day_mean.index,
    y=df_day_mean['total'],
    mode='lines',
    error_y={
        'type': 'data',
        'symmetric': False,             # 上下対称の設定（非対称）
        'array': error_positive,        # エラーバー上側に使用する変数
        'arrayminus': error_negative,   # エラーバー下側に使用する変数
        'visible': True
    }
)   # 総支払額のエラーバー付き折れ線グラフ（最小値から最大値）

# Layoutを作成
layout = go.Layout(
    template=template,
    title='Taxis dataset',
    xaxis={'title': 'Pickup'},
    yaxis={'title': 'Total'}
)

# Figureを作成
figure = go.Figure(trace, layout)

figure

In [11]:
# 2/28から3/7まで選択したDataFrameの例
df.loc['2019-02-28':'2019-03-07']

Unnamed: 0_level_0,passengers,distance,fare,tip,total
pickup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-02-28 23:29:03,1,0.90,5.0,0.00,6.30
2019-03-01 00:03:29,3,2.16,10.0,2.00,15.80
2019-03-01 00:08:32,3,7.35,22.5,1.00,27.30
2019-03-01 00:15:53,1,7.00,25.5,7.30,36.60
2019-03-01 00:29:22,4,0.74,4.5,1.00,9.30
...,...,...,...,...,...
2019-03-07 23:42:53,1,1.81,9.0,0.00,12.80
2019-03-07 23:43:16,1,0.54,4.5,1.20,7.00
2019-03-07 23:46:25,1,1.09,6.0,1.96,11.76
2019-03-07 23:47:25,3,2.30,10.5,2.86,17.16


In [12]:
# 週の最初の日付のlist
start_days = [date(2019, 2, 28) + timedelta(weeks=week) for week in range(5)]

start_days

[datetime.date(2019, 2, 28),
 datetime.date(2019, 3, 7),
 datetime.date(2019, 3, 14),
 datetime.date(2019, 3, 21),
 datetime.date(2019, 3, 28)]

In [13]:
names = ['1st week', '2nd week', '3rd week', '4th week', '5th week']

# Traceのlistを作成
traces = []
for start_day, name in zip(start_days, names):
    end_day = start_day + timedelta(6)  # end_dayも含まれるので6日
    df_subset = df.loc[start_day:end_day]

    trace = go.Violin(
        x=df_subset['total'],
        orientation='h',
        side='positive',
        width=3,
        points=False,
        name=name
    )   # 総支払額のリッジラインプロット
    traces.append(trace)

# Layoutを作成
layout = go.Layout(
    template=template,
    title='Taxis dataset',
    xaxis={
        'title': 'Total',
        'showline': False
    },
    yaxis={'title': 'Pickup'}
)

# Figureを作成
figure = go.Figure(traces, layout)

figure

In [14]:
# Aggrnylのカラースケール
aggrnyl = px.colors.sequential.Aggrnyl

aggrnyl

['rgb(36, 86, 104)',
 'rgb(15, 114, 121)',
 'rgb(13, 143, 129)',
 'rgb(57, 171, 126)',
 'rgb(110, 197, 116)',
 'rgb(169, 220, 103)',
 'rgb(237, 239, 93)']

In [15]:
# 5週分の色のlistを作成
colors = n_colors(aggrnyl[0], aggrnyl[-1], len(start_days), colortype='rgb')

colors

['rgb(36.0, 86.0, 104.0)',
 'rgb(86.25, 124.25, 101.25)',
 'rgb(136.5, 162.5, 98.5)',
 'rgb(186.75, 200.75, 95.75)',
 'rgb(237.0, 239.0, 93.0)']

In [16]:
# Traceのlistを作成
traces = []
for start_day, name, color in zip(start_days, names, colors):
    end_day = start_day + timedelta(6)
    df_subset = df.loc[start_day:end_day]

    trace = go.Violin(
        x=df_subset['total'],
        orientation='h',
        side='positive',
        width=3,
        points=False,
        name=name,
        line_color=color
    )
    traces.append(trace)

# Figureを作成
figure = go.Figure(traces, layout)

figure