In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv

from __future__ import print_function
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.models import Sequential
from keras.utils import np_utils
from keras.utils import plot_model

from keras.layers.recurrent import LSTM
from keras.callbacks import EarlyStopping
from keras.initializers import glorot_uniform
from keras.initializers import orthogonal
from keras.initializers import TruncatedNormal

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### CSVファイル読み込み

In [2]:
# 学習データ
df1 =csv.reader(open('marketdata.csv', 'r', encoding="utf-8"))
data1 = [ v for v in df1]

mat = np.array(data1)
mat2 = mat[1:]                        # 見出し行を外す
x_data = mat2[:, 1:].astype(np.float)  # 2列目以降を抜き出してfloat変換
print('x_data.shape=', x_data.shape)

# ラベルデータ
# 1％以上／0％以上／-1％以上／-1％未満
df2 = csv.reader(open('marketdata_onehot.csv', 'r', encoding="utf-8"))
data2 = [ v for v in df2]
mat3 = np.array(data2)
mat4 = mat3[1:]                       # 見出し行を外す
t_data = mat4[:, 1:].astype(np.float)  # 2列目以降を抜き出してfloat変換
print('t_data.shape=', t_data.shape)

x_data.shape= (4211, 1)
t_data.shape= (4211, 4)


In [3]:
maxlen = 80              # 入力系列数
n_in = x_data.shape[1]   # 学習データ（＝入力）の列数
print("n_in:",n_in)
n_out = t_data.shape[1]  # ラベルデータ（=出力）の列数prin("n_in:",n_in)
print("n_out:",n_out)
len_seq = x_data.shape[0] - maxlen + 1
print("x_data.shape[0]:",x_data.shape[0])
print("len_seq:",len_seq)

data = []
target = []
for i in range(0, len_seq):
  data.append(x_data[i:i+maxlen, :]) #80個ずつずらして取得
  target.append(t_data[i+maxlen-1, :])#80個目から取得

print("len(data):",len(data))
x = np.array(data).reshape(len(data), maxlen, n_in)
t = np.array(target).reshape(len(data), n_out)

print(x.shape, t.shape)

# ここからソースコードの後半
n_train = int(len(data)*0.9)              # 訓練データ長
x_train,x_test = np.vsplit(x, [n_train])  # 学習データを訓練用とテスト用に分割
t_train,t_test = np.vsplit(t, [n_train])  # ラベルデータを訓練用とテスト用に分割

print(x_train.shape, x_test.shape, t_train.shape, t_test.shape)

n_in: 1
n_out: 4
x_data.shape[0]: 4211
len_seq: 4132
len(data): 4132
(4132, 80, 1) (4132, 4)
(3718, 80, 1) (414, 80, 1) (3718, 4) (414, 4)


In [4]:
class Prediction :
  def __init__(self, maxlen, n_hidden, n_in, n_out):
    self.maxlen = maxlen
    self.n_hidden = n_hidden
    self.n_in = n_in
    self.n_out = n_out

  def create_model(self):
    model = Sequential()
    model.add(LSTM(self.n_hidden, batch_input_shape = (None, self.maxlen, self.n_in),
             kernel_initializer = glorot_uniform(seed=20170719), 
             recurrent_initializer = orthogonal(gain=1.0, seed=20170719), 
             dropout = 0.5, 
             recurrent_dropout = 0.5))
    model.add(Dropout(0.5))
    model.add(Dense(self.n_out, 
            kernel_initializer = glorot_uniform(seed=20170719)))
    model.add(Activation("softmax"))
    model.compile(loss="categorical_crossentropy", optimizer = "RMSprop", metrics = ['categorical_accuracy'])
    return model

  # 学習
  def train(self, x_train, t_train, batch_size, epochs) :
    early_stopping = EarlyStopping(patience=0, verbose=1)
    model = self.create_model()
    model.fit(x_train, t_train, batch_size = batch_size, epochs = epochs, verbose = 1,
          shuffle = True, callbacks = [early_stopping], validation_split = 0.1)
    return model

In [5]:
n_hidden = 80     # 出力次元
epochs = 100      # エポック数
batch_size = 10   # ミニバッチサイズ

# モデル定義
prediction = Prediction(maxlen, n_hidden, n_in, n_out)
# 学習
model = prediction.train(x_train, t_train, batch_size, epochs)
# テスト
score = model.evaluate(x_test, t_test, batch_size = batch_size, verbose = 1)
print("score:", score)

# 正答率、準正答率（騰落）集計
preds = model.predict(x_test)
correct = 0
semi_correct = 0
for i in range(len(preds)):
  pred = np.argmax(preds[i,:])
  tar = np.argmax(t_test[i,:])
  if pred == tar :
    correct += 1
  else :
    if pred+tar == 1 or pred+tar == 5 :
      semi_correct += 1

print("正答率:", 1.0 * correct / len(preds))
print("準正答率（騰落）:", 1.0 * (correct+semi_correct) / len(preds))

Train on 3346 samples, validate on 372 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00005: early stopping
正答率: 0.8188405797101449
準正答率（騰落）: 0.9734299516908212
