In [1]:
import time, warnings
from tensorflow.keras.layers import Dense, Activation, Dropout, LSTM, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam
from numpy import newaxis # 차원을 분해한 후 한 단계 추가

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


In [2]:
epoch = 3
seq_len = 50

In [3]:
df = pd.read_csv('sp500.csv', header=None)
df

Unnamed: 0,0
0,1455.219971
1,1399.420044
2,1402.109985
3,1403.449951
4,1441.469971
...,...
4166,2169.179932
4167,2166.580078
4168,2170.060059
4169,2173.600098


In [4]:
def load_data(filename, seq_len, normalize_window):
    f = open(filename, 'r').read()
    data = f.split('\n')
    
    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    if normalize_window:
        result = normalise_windows(result)
        
    result = np.array(result) # shape=(4121, 51)
    row = round(0.9 * result.shape[0]) # 3709
    train = result[:int(row),:] # shape=(3709, 51)

    np.random.shuffle(train)
    x_train = train[:,:-1]
    y_train = train[:,-1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]
    
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
    
    return [x_train, y_train, x_test, y_test]

def normalise_windows(window_data):
    normalised_data = []
    for window in window_data:
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
        
    return normalised_data

In [5]:
X_train, y_train, X_test, y_test = load_data('sp500.csv', seq_len, True)

## Create model

In [6]:
i = Input(shape=(seq_len, 1))
x = LSTM(50, return_sequences=True)(i)
x = Dropout(0.2)(x)
x = LSTM(100, return_sequences=False)(x)
x = Dropout(0.2)(x)
x = Dense(1)(x)
model = Model(i, x)
model.summary()

start = time.time()
model.compile(loss="mse", optimizer='adam')
print(" 実行時間：　", time.time() - start)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 50, 1)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 50, 50)            10400     
_________________________________________________________________
dropout (Dropout)            (None, 50, 50)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               60400     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 70,901
Trainable params: 70,901
Non-trainable params: 0
_________________________________________________________

In [7]:
model.fit(X_train, y_train, batch_size=512, epochs=3, validation_split=0.05)

Train on 3523 samples, validate on 186 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x645f0ded0>

In [8]:
def predict_sequences_multiple(model, data, window_size, prediction_len):
    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    for i in range((int)(len(data)/prediction_len)):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            # curr_fram(49)에 49번째의 predicted[-] 값 삽입
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

In [9]:
# X_test[0].shape -> (50, 1)
predictions = predict_sequences_multiple(model, X_test, seq_len, 50)

In [10]:
def plot_results_multiple(predicted_data, true_data, prediction_len):
    fig = plt.figure(facecolor='white', figsize=(12, 12))
    ax = fig.add_subplot(1, 1, 1) # 1 x 1 subplot중 첫번째
    ax.plot(true_data, label='True Data')
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * prediction_len)]
        plt.plot(padding + data, label='Prediction')
        plt.plot(data, label='Prediction')
        plt.legend()
    plt.show()

In [None]:
plot_results_multiple(predictions, y_test, 50)