In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

# 演習1

# データのロード

In [None]:
train = pd.read_csv("data/train_enshu1.csv")
test = pd.read_csv("data/test_enshu1.csv")

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train = pd.pivot_table(train, index="Date/Time", columns="Base", values="count")

In [None]:
test = pd.pivot_table(test, index="Date/Time", columns="Base", values="count")

In [None]:
train.head()

In [None]:
train.shape

In [None]:
test.shape

## データ正規化

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
diff_ma_train = train - train.rolling(7).mean()
diff_ma_test = test - test.rolling(7).mean()

In [None]:
diff_ma_train.dropna(inplace=True)
diff_ma_test.dropna(inplace=True)

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [None]:
train_f = scaler.fit_transform(diff_ma_train.values.astype(np.float32))

In [None]:
test_f = scaler.transform(diff_ma_test.values.astype(np.float32))

In [None]:
train_f.shape

In [None]:
test_f.shape

## データをLSTMが読み込める形式に変換

In [None]:
train_X = train_f[:-1].T.reshape((5, 146, 1))

In [None]:
train_Y = train_f[1:].T.reshape((5, 146, 1))

In [None]:
test_X = test_f[:-1].T.reshape((5, 23, 1))

In [None]:
test_Y = test_f[1:].T.reshape((5, 23, 1))

In [None]:
train_X.shape

## モデルの作成

In [None]:
import keras

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, TimeDistributed, Dense

In [None]:
model = Sequential()
model.add(LSTM(input_dim=1, output_dim=10, return_sequences=True))
model.add(TimeDistributed(Dense(1)))
model.compile(loss="mean_squared_error", optimizer="adam")

## 学習

In [None]:
model.fit(train_X, train_Y, nb_epoch=50)

## 評価

### 学習データに対するFitを確認

In [None]:
pred_Y_train = model.predict(train_Y)

In [None]:
pred_Y_train.shape

In [None]:
train_Y.shape

In [None]:
pred_train = pred_Y_train.reshape((5, 146))
ref_train = train_Y.reshape((5, 146))

In [None]:
for i in range(5):
    pd.DataFrame(dict(pred=pred_train[i], ref=ref_train[i])).plot()

### テストデータに対するFitを確認

In [None]:
pred_Y = model.predict(test_X)

In [None]:
pred_Y.shape

In [None]:
test_Y.shape

In [None]:
pred = pred_Y.reshape((5, 23))
ref = test_Y.reshape((5, 23))

In [None]:
for i in range(5):
    pd.DataFrame(dict(pred=pred[i], ref=ref[i])).plot()

## たくさん反復した時の性能を見てみましょう

10000反復したモデルを付属していますので、その時の性能を確認しましょう。

In [None]:
hiden = keras.models.load_model("data/hiden_no_tare_enshu1.h5")
pred_Y_hiden_train = hiden.predict(train_Y)
pred_hiden_train = pred_Y_hiden_train.reshape((5, 146))
for i in range(5):
    pd.DataFrame(dict(pred=pred_hiden_train[i], ref=ref_train[i])).plot()

In [None]:
pred_Y_hiden = hiden.predict(test_Y)
pred_hiden = pred_Y_hiden.reshape((5, 23))
for i in range(5):
    pd.DataFrame(dict(pred=pred_hiden[i], ref=ref[i])).plot()

## モデル出力から予測値に変換

In [None]:
test.rolling(7).mean().iloc[6:].shape, scaler.inverse_transform(pred_hiden.T).shape

In [None]:
predicted_final = test.rolling(7).mean().iloc[7:] + scaler.inverse_transform(pred_hiden.T)

In [None]:
predicted_final.shape, test.values.shape

In [None]:
for i in range(5):
    pd.DataFrame(dict(pred=predicted_final.values[:, i], ref=test.values[7:, i])).plot()