In [None]:
%matplotlib inline

#２変量 入力
#
#sin曲線 の 100時点 の 値
#cos曲線 の 100時点 の 値
#を 入力値 として 受け取り、
#
#１期先 の １変量 出力
#
#101時点目 の (sin + cos) の 値
#を 出力 する
#
#２変量 bi-LSTM 回帰予測モデル（１時点先を予測）
#を 学習させて みました。

In [None]:
import pandas as pd
import numpy as np
import math
import random

random.seed(123)
# 乱数の係数
random_factor = 0.05
# サイクルあたりのステップ数
steps_per_cycle = 80
# 生成するサイクル数
number_of_cycles = 50

df = pd.DataFrame(np.arange(steps_per_cycle * number_of_cycles + 1), columns=["t"])
df["sin_t"] = df.t.apply(lambda x: math.sin(x * (2 * math.pi / steps_per_cycle)+ random.uniform(-1.0, +1.0) * random_factor))
df[["sin_t"]].head(steps_per_cycle * 2).plot()

In [None]:
from numpy.random import *
df["cos_t"] = df.t.apply(lambda x: math.cos(x * (2 * math.pi / steps_per_cycle)+ uniform(-1.0, +1.0) * random_factor))
df[["cos_t"]].head(steps_per_cycle * 2).plot()

In [None]:
df["sin+cos"] = df["sin_t"] + df["cos_t"]
df[["sin+cos"]].head(steps_per_cycle * 2).plot()

In [None]:
def _load_data(data, n_prev = 100):  
    """
    data should be pd.DataFrame()
    """

    docX, docY = [], []
    for i in range(len(data)-n_prev):
        docX.append(data.iloc[i:i+n_prev].as_matrix())
        docY.append(data.iloc[i+n_prev].as_matrix())
    alsX = np.array(docX)
    alsY = np.array(docY)

    return alsX, alsY

def train_test_split(df, test_size=0.1, n_prev = 100):  
    """
    This just splits data to training and testing parts
    """
    ntrn = round(len(df) * (1 - test_size))
    ntrn = int(ntrn)
    X_train, y_train = _load_data(df.iloc[0:ntrn], n_prev)
    X_test, y_test = _load_data(df.iloc[ntrn:], n_prev)

    return (X_train, y_train), (X_test, y_test)

In [None]:
(X_sin_train, _), (X_sin_test, _) = train_test_split(df[["sin_t"]], n_prev =100)  
(X_cos_train, _), (X_cos_test, _) = train_test_split(df[["cos_t"]], n_prev =100) 
(_, y_sincos_train), (_, y_sincos_test) = train_test_split(df[["sin+cos"]], n_prev =100)

In [None]:
X_sin_train.shape

In [None]:
test_size=0.1
steps_per_cycle * number_of_cycles * test_size

In [None]:
steps_per_cycle * number_of_cycles * (1 - test_size)

In [None]:
X_sin_test.shape

In [None]:
data = []

for i in range(X_sin_train.shape[0]):
    data.append([[x_sin_elem, x_cos_elem] for x_sin_elem, x_cos_elem
                                 in zip(X_sin_train[i], X_cos_train[i])])

In [None]:
data = np.array(data)
data.shape

In [None]:
data = data.reshape(3501, 100, 2)
data.shape

In [None]:
# 各時点 の (sin, cos）ペア が 格納されていること を 確認
print(data[0][0])

In [None]:
print(data[0][0])
print(X_sin_train[0][0], X_cos_train[0][0])

In [None]:
print(data[3500][99])
print(X_sin_train[3500][99], X_cos_train[3500][99])

In [None]:
# 各時点 の (sin, cos) の 組 が 出来ている

In [None]:
# 検証（テスト）用 の 入力層Inputデータセット を 同様 に 作成
test_data = []
for i in range(X_sin_test.shape[0]):
    test_data.append([[x_sin_elem, x_cos_elem] for x_sin_elem, x_cos_elem
                                 in zip(X_sin_test[i], X_cos_test[i])])

In [None]:
test_data_array = np.array(test_data)
test_data_array.shape

In [None]:
test_data_array = test_data_array.reshape(300, 100, 2)
test_data_array.shape

In [None]:
y_sincos_train.shape

In [None]:
# 検証（テスト）用データセット の 正解値
# 検証用データ を 用いた 予測値 と 実測値（正解値） を
# グラフ に 重ねて誤 差乖離 を 見る際 に 用いる。

y_sincos_train.reshape(3501)
y_sincos_train.shape

In [None]:
y_sincos_train[0:10]

In [None]:
# （参考）http://qiita.com/HirofumiYashima/items/3767a321b4cb544a5581

from keras.models import Model
from keras.layers.core import Dense, Activation  
from keras.layers import Input,Dense, Dropout, Embedding, LSTM, Bidirectional

# 2変量モデル だから、batch_shape=(None, 100, 2)
input_layer = Input(batch_shape=(None, 100, 2))
# ↓単変量モデルの場合 batch_input_shape=(None, 100, 1)
# input_layer = Input(batch_shape=(None, 100, 1))

bilstm_output_layer = Bidirectional(LSTM(300))(input_layer)
dense_output_layer = Dense(1)(bilstm_output_layer)
prediction_layer = Activation("linear")(dense_output_layer)

model = Model(input=input_layer, output=prediction_layer)

In [None]:
# 入力データの次元より も bi-LSTM層 の 次元 を 間違えて 大きくしてしまった。

model.summary()

In [None]:
model.compile(loss="mean_squared_error",  optimizer="adam")

In [None]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=0)

model_history_log = model.fit(data, y_sincos_train, 
                              batch_size=600, 
                              nb_epoch=50, 
                              validation_split=0.05,
                              callbacks=[early_stopping])

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

loss = model_history_log.history['loss']
val_loss = model_history_log.history['val_loss']

nb_epoch = len(loss)

plt.plot(range(nb_epoch), loss, marker='.', label='loss')
plt.plot(range(nb_epoch), val_loss, marker='.', label='val_loss')

plt.legend(loc='best', fontsize=10)
plt.grid()

plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

In [None]:
predicted = model.predict(test_data_array)  
len(predicted)

In [None]:
dataf =  pd.DataFrame(predicted)
dataf.columns = ["predicted"]
dataf["true_value(observed_value)"] = y_sincos_test
dataf.plot()

In [None]:
# 予測誤差 を 算出
dataf["difference"] = dataf["predicted"] - dataf["true_value(observed_value)"] 
dataf["difference"].plot()

In [None]:
dataf.plot()

In [None]:
# bi-LSTM層 の 次元 を 入力データの次元より 小さく する。

from keras.models import Model
from keras.layers.core import Dense, Activation  
from keras.layers import Input,Dense, Dropout, Embedding, LSTM, Bidirectional

# 2変量モデル だから、batch_shape=(None, 100, 2)
input_layer = Input(batch_shape=(None, 100, 2))

bilstm_output_layer = Bidirectional(LSTM(50))(input_layer)
dense_output_layer = Dense(1)(bilstm_output_layer)
prediction_layer = Activation("linear")(dense_output_layer)

model_2 = Model(input=input_layer, output=prediction_layer)

In [None]:
model_2.summary()

In [None]:
model_2.compile(loss="mean_squared_error",  optimizer="adam")

In [None]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=0)

model_2_history_log = model_2.fit(data, y_sincos_train, 
                                  batch_size=600, 
                                  nb_epoch=50, 
                                  validation_split=0.05,
                                  callbacks=[early_stopping]) 

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

loss = model_2_history_log.history['loss']
val_loss = model_2_history_log.history['val_loss']

nb_epoch = len(loss)

plt.plot(range(nb_epoch), loss, marker='.', label='loss')
plt.plot(range(nb_epoch), val_loss, marker='.', label='val_loss')

plt.legend(loc='best', fontsize=10)
plt.grid()

plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

In [None]:
predicted_2 = model_2.predict(test_data_array)  
len(predicted_2)

In [None]:
dataf =  pd.DataFrame(predicted_2)
dataf.columns = ["predicted_2"]
dataf["true_value(observed_value)"] = y_sincos_test
dataf.plot()

In [None]:
# 予測誤差 を 算出
dataf["difference_2"] = dataf["predicted_2"] - dataf["true_value(observed_value)"] 
dataf["difference_2"].plot()

In [None]:
dataf.plot()

In [None]:
# 誤差が大きくなった。

In [None]:
# bi-LSTM層 の 次元 を 入力データの次元より 最初より 大きく する。

from keras.models import Model
from keras.layers.core import Dense, Activation  
from keras.layers import Input,Dense, Dropout, Embedding, LSTM, Bidirectional

# 2変量モデル だから、batch_shape=(None, 100, 2)
input_layer = Input(batch_shape=(None, 100, 2))

# 800次元にする
bilstm_output_layer = Bidirectional(LSTM(800))(input_layer)
dense_output_layer = Dense(1)(bilstm_output_layer)
prediction_layer = Activation("linear")(dense_output_layer)

model_3 = Model(input=input_layer, output=prediction_layer)

In [None]:
model_3.summary()

In [None]:
model_3.compile(loss="mean_squared_error",  optimizer="adam")

In [None]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=0)

model_3_history_log = model_3.fit(data, y_sincos_train, 
                                  batch_size=600, 
                                  nb_epoch=50, 
                                  validation_split=0.05,
                                  callbacks=[early_stopping])

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

loss = model_3_history_log.history['loss']
val_loss = model_3_history_log.history['val_loss']

nb_epoch = len(loss)

plt.plot(range(nb_epoch), loss, marker='.', label='loss')
plt.plot(range(nb_epoch), val_loss, marker='.', label='val_loss')

plt.legend(loc='best', fontsize=10)
plt.grid()

plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

In [None]:
predicted_3 = model_3.predict(test_data_array)  
len(predicted_3)

In [None]:
dataf =  pd.DataFrame(predicted_3)
dataf.columns = ["predicted_3"]
dataf["true_value(observed_value)"] = y_sincos_test
dataf.plot()

In [None]:
# 予測誤差 を 算出
dataf["difference_3"] = dataf["predicted_3"] - dataf["true_value(observed_value)"] 
dataf["difference_3"].plot()

In [None]:
dataf.plot()

In [None]:
dataf.describe()

In [None]:
dataf.head()

In [None]:
dataf.tail()