In [None]:
%matplotlib inline
import chainer
from chainer import optimizers, cuda, serializers
import chainer.functions as F
import numpy as np
import csv
import math
import matplotlib.pyplot as plt
import time
import datetime
from net import Net, MLP

In [None]:
n_epoch = 4000000 #エポック
batchsize = 5000 #バッチサイズ
printout_interval = 100000 #何エポック毎に誤差をプリントアウトするか

bprop_len = 100 #何回でバックプロパゲーションを打ち切るか(trancate)
n_units = 20 #中間層のユニット数
grad_clip = 5 #誤差関数の絶対値をこの値に制限する
use_gpu = True #GPUを使うかどうか

In [None]:
xp = cuda.cupy if use_gpu is True else np

In [None]:
raw_data = []
f = open('../data/result_nikkei15min.csv','r')
data = csv.reader(f, delimiter=',')
for row in data:
    if row[9] == "" or row[7] == "":
        continue
    raw_data.append([row[7], row[10], row[9]])#移動平均乖離率, tweet数, 天井度
raw_data = np.asarray(raw_data, dtype=np.float32)
raw_data = raw_data
data_x = raw_data[:, 0:2]
data_y = raw_data[:, 2:3]
train_data_x, test_data_x = data_x[:int(len(data_x) * 0.8)], data_x[-int(len(data_x) * 0.2):]
train_data_y, test_data_y = data_y[:int(len(data_y) * 0.8)], data_y[-int(len(data_y) * 0.2):]

plt.plot(train_data_x[:500])
plt.plot(train_data_y[:500])

print(train_data_x.shape)
print(train_data_y.shape)
print(test_data_x.shape)
print(test_data_y.shape)

In [None]:
plt.plot(test_data_x)
plt.plot(test_data_y)

In [None]:
#ネットワークを試す関数
def evaluate(model, x, y):
    evaluator = model.copy()
    evaluator.reset_state()
    out = evaluator.predict(x)
    error = F.mean_squared_error(out, y)
    return chainer.cuda.to_cpu(out.data), error.data

In [None]:
model = Net(2, n_units, 1)#入力の次元数, 中間層の次元数
if use_gpu is True:
    model.to_gpu()
optimizer = optimizers.Adam()
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.GradientClipping(grad_clip))
loss = []
test_loss = []
length = len(train_data_x)
jump = length // batchsize
batch_idxs = list(range(batchsize))
accum_loss = 0
epoch = 0
loss_data = 0
x_cache = []
y_cache = []
x_chainer_variable = chainer.Variable(xp.asarray(train_data_x, dtype=np.float32))
y_chainer_variable = chainer.Variable(xp.asarray(train_data_y, dtype=np.float32))
test_data_x_chainer_variable = chainer.Variable(xp.asarray(test_data_x, dtype=np.float32))
test_data_y_chainer_variable = chainer.Variable(xp.asarray(test_data_y, dtype=np.float32))
prev_time = time.time()
for i in range(jump * n_epoch):
    if len(x_cache) <= i % jump:
        x_cache.append( chainer.Variable(xp.asarray([train_data_x[(jump * j + i) % length] for j in batch_idxs])))
        y_cache.append( chainer.Variable(xp.asarray([train_data_y[(jump * j + i) % length] for j in batch_idxs])))
    x = x_cache[i % jump]
    y = y_cache[i % jump]
    #x = chainer.Variable(xp.asarray([[train_data_x[(jump * j + i) % length]] for j in batch_idxs]))
    #y = chainer.Variable(xp.asarray([[train_data_y[(jump * j + i) % length]] for j in batch_idxs]))
    loss_i = model(x,y)
    accum_loss += loss_i
    loss_data += accum_loss.data
        
    if (i+1) % jump == 0:
        epoch += 1
        if epoch % printout_interval == 0:
            now = time.time()
            elapsed_time = now - prev_time
            prev_time = now
            speed = printout_interval / elapsed_time
            eta = datetime.timedelta(seconds = (n_epoch - epoch) / speed)
            eta = datetime.timedelta(seconds = eta.seconds)
            print('epoch {0}, error {1}, {2:.2f} epoch/s, eta {3}'.format(epoch, loss_data * len(x) / length, speed, eta))
        loss.append(evaluate(model, x_chainer_variable, y_chainer_variable)[1])
        test_loss.append(evaluate(model, test_data_x_chainer_variable, test_data_y_chainer_variable)[1])
        loss_data = 0
        model.reset_state()
        
    
    if (i+1) % bprop_len == 0:
        model.zerograds()
        accum_loss.backward()
        accum_loss.unchain_backward()
        accum_loss = 0
        optimizer.update()

In [None]:
#モデルを保存
serializers.save_npz('my.model', model)

In [None]:
plt.plot(loss[0:], label='error')
plt.plot(test_loss[0:], label='test_error')
plt.legend()

In [None]:
plt.plot(loss)
plt.plot(test_loss)
plt.yscale('log')

In [None]:
output, error = evaluate(model, x_chainer_variable, y_chainer_variable)

print(error)

In [None]:
plt.plot(output)
plt.plot(train_data_x[:, 0])
print(len(output))
print(train_data_x.shape)

In [None]:
output, error = evaluate(model, test_data_x_chainer_variable, test_data_y_chainer_variable)
print('test error = {}'.format(error))

In [None]:
plt.plot(output[300:600, 0], label='prediction')
plt.plot(test_data_y[300:600, 0], label='target')
plt.legend()