<a href="https://colab.research.google.com/github/ishi23/deep-learning-with-pytorch-ja/blob/main/p1ch4/4_time_series_bikes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd /content/drive/MyDrive/repos/deep-learning-with-pytorch-ja/p1ch4/

/content/drive/MyDrive/repos/deep-learning-with-pytorch-ja/p1ch4


In [5]:
import numpy as np
import torch
import pandas as pd
torch.set_printoptions(edgeitems=2, threshold=50, linewidth=75)

In [14]:
csvfile = "../data/p1ch4/bike-sharing-dataset/hour-fixed.csv"
# データチェック:レンタル自転車データ
df_view = pd.read_csv(csvfile)
df_view

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17515,17375,2012-12-31,1,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,11,108,119
17516,17376,2012-12-31,1,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,8,81,89
17517,17377,2012-12-31,1,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,7,83,90
17518,17378,2012-12-31,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,13,48,61


- レコードのインデックス(行番号):instant 
- 日付:day 
- 季節:season(1:春、2:夏、3:秋、4:冬) 
- 年:yr(0:2011 年、1:2012 年) 
- 月:mnth(1~12 月)
- 時間:hr(0~23)
- 休日かどうか:holiday
- 曜日:weekday
- 平日かどうか:workingday 
- 気象状況:weathersit(1:晴れ、2:霧、3:小雨 / 雪、4:本降り / 雪 )
- 温度(°C): temp
- 体感温度(°C): atemp
- 湿度:hum
- 風速:windspeed
- カジュアルユーザ数:causal
- 登録ユーザ数:registered 
- 使用中レンタル自転車の台数:cnt

In [11]:
# 
bikes_numpy = np.loadtxt(
    csvfile, 
    dtype=np.float32, 
    delimiter=",", 
    skiprows=1, 
    converters={1: lambda x: float(x[8:10])}) # [1]列目の日付文字列を、△月○日の○部分に変換
bikes = torch.from_numpy(bikes_numpy)
bikes

tensor([[1.0000e+00, 1.0000e+00,  ..., 1.3000e+01, 1.6000e+01],
        [2.0000e+00, 1.0000e+00,  ..., 3.2000e+01, 4.0000e+01],
        ...,
        [1.7378e+04, 3.1000e+01,  ..., 4.8000e+01, 6.1000e+01],
        [1.7379e+04, 3.1000e+01,  ..., 3.7000e+01, 4.9000e+01]])

In [21]:
bikes[:,1].reshape(-1,24)

tensor([[ 1.,  1.,  ...,  1.,  1.],
        [ 2.,  2.,  ...,  2.,  2.],
        ...,
        [30., 30.,  ..., 30., 30.],
        [31., 31.,  ..., 31., 31.]])

In [22]:
bikes.shape, bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [27]:
# 17520 x 17 -> 730 x 24 x 17
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()  # 408個(17x24)個置きのストライド、その中で17個置きのストライド

(torch.Size([730, 24, 17]), (408, 17, 1))

In [26]:
# stride()の挙動確認
tmp = torch.tensor(range(10))
tmp

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [25]:
tmp.stride()

(1,)

In [28]:
# N個 x Cチャネル x L長さに変換（今はN x L x C）
daily_bikes = daily_bikes.transpose(1, 2)
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [29]:
# 初日データの確認

# 最初の24時間を取得。float64に
first_day = bikes[:24].long()
# 天気は４段階データのため、長さ４の配列。これを24時間分準備する
weather_onehot = torch.zeros(first_day.shape[0], 4)
# 初日の各時間の天気
first_day[:,9]

tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2,
        2, 2])

In [30]:
# onehotの作成
weather_onehot.scatter_(
    dim=1, 
    index=first_day[:,9].unsqueeze(1).long() - 1, # 1-4なので、0-3にするために1を引く
    value=1.0)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [31]:
# concatenate
# 17行 + 4行（onehot）
torch.cat((bikes[:24], weather_onehot), 1)[:1]  

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          6.0000,  0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,
          3.0000, 13.0000, 16.0000,  1.0000,  0.0000,  0.0000,  0.0000]])

In [32]:
torch.cat((bikes[:24], weather_onehot), 1)[:1].shape

torch.Size([1, 21])

In [35]:
# 全データに対して同じことをする
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4,
                                   daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [36]:
daily_weather_onehot.scatter_(
    1, daily_bikes[:,9,:].long().unsqueeze(1) - 1, 1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [37]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)
daily_bikes.shape

torch.Size([730, 21, 24])

In [38]:
# onehotを目的変数とせず、whetherカラムを連蔵変数として直接目的変数とする場合、0-1に正規化
daily_bikes[:, 9, :] = (daily_bikes[:, 9, :] - 1.0) / 3.0

In [40]:
# temperatureの正規化例：Min-Max
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - temp_min)
                         / (temp_max - temp_min))

In [41]:
# temperatureの標準化例：平均０標準偏差１
temp = daily_bikes[:, 10, :]
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - torch.mean(temp))
                         / torch.std(temp))