In [None]:
import quandl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

quandl.ApiConfig.api_key = 'API_KEY'
TICKERS = ['AMZN', 'MSFT', 'DIS', 'KO']  # AMAZON, MICROSOFT, DISNEY, COCA-COLA
ticker = 'DIS'

try:
    stock = quandl.get('%s/%s' % ('WIKI', ticker))
except Exception as e:
    print('Error Retrieving Data.')
    print(e)

stock = stock.reset_index()

min_date = min(stock['Date'])
max_date = max(stock['Date'])
print('{} Stocker initialized. Data covers {} to {}'.format(ticker, min_date.date(), max_date.date()))
stock.head()


In [None]:
# 1. stock 데이터의 컬럼을 Close 만 남기기.
stock =stock[['Date', 'Adj. Close']]
stock.head()

In [None]:
##### Exploratory Visualization
### stock 데이터 한 번 그려주자.
fig, ax = plt.subplots(1,1)
ax.plot(np.array(stock)[:,0], np.array(stock)[:,1], 'c', linewidth=1.4, label='whole data')
plt.legend(loc=2, prop={'size': 10})
plt.xlabel('Date')
plt.ylabel('Price $')
plt.grid(linewidth=0.6, alpha=0.6)
plt.title('{} stock price'.format(ticker))
plt.show()

In [None]:
# 2. 데이터셋 생성하기.
WINDOW_SIZE = 245
TRAINING_START_DATE = '2013-01-01'
TRAINING_END_DATE = '2016-12-31'
VALIDATION_START_DATE = '2017-01-01'
VALIDATION_END_DATE = '2017-12-31'
TEST_START_DATE = '2018-01-01'

train_mask = (stock['Date'] > TRAINING_START_DATE) & (stock['Date'] < TRAINING_END_DATE)
train = np.array(stock.loc[train_mask])

def makeDataset(data, window_size):
    dataset_x = []
    dataset_y = []

    for i in range(len(data) - window_size):
        subset = data[i:i + window_size + 1]

        sub_dataset_x = []
        first_subset_data = subset[0][1]
        for si in range(len(subset) - 1):
            sub_dataset_x.append(subset[si][1] / float(first_subset_data) - 1)

        dataset_x.append(sub_dataset_x)
        dataset_y.append([subset[window_size][1] / float(first_subset_data) - 1])

    return np.array(dataset_x), np.array(dataset_y)

x_train, y_train = makeDataset(train, WINDOW_SIZE)

print(x_train.shape)
print(y_train.shape)


In [None]:
# 3. 모델 구성하기 (LSTM)
from keras.models import Sequential
from keras.layers import Dense, LSTM

FEATURE_LENGTH = 1

model = Sequential()
model.add(LSTM(128, batch_input_shape=(1, WINDOW_SIZE, FEATURE_LENGTH), stateful=True))
model.add(Dense(1, activation='linear'))

In [None]:
# 4. 모델 학습과정 설명하기
model.compile(loss='mse', optimizer='adam')

In [None]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape

In [None]:
# 5. 모델 학습시키기
import keras
from keras.callbacks import EarlyStopping

num_epochs = 8


class LossHistory(keras.callbacks.Callback):
    def __init__(self):
        super().__init__()
        self.losses = []

    def on_epoch_end(self, batch, logs=None):
        if logs is None:
            logs = {}
        self.losses.append(logs.get('loss'))


history = LossHistory()
callbacks = [
    history,
    EarlyStopping(monitor='loss', patience=4),
]

for epoch_idx in range(num_epochs):
    print('epochs: {}/{}'.format(str(epoch_idx + 1), str(num_epochs)))
    model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2, callbacks=callbacks)
    model.reset_states()

In [None]:
# 6. 학습 과정 살펴보기 - matplotlib
%matplotlib inline
plt.plot(history.losses)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper right')
plt.show()

In [None]:
# 7. 모델 평가하기
validation_mask = (stock['Date'] > VALIDATION_START_DATE) & (stock['Date'] < VALIDATION_END_DATE)
validation = np.array(stock.loc[validation_mask])
x_validation, y_validation = makeDataset(validation, WINDOW_SIZE)
x_validation = np.reshape(x_validation, (x_validation.shape[0], x_validation.shape[1], 1))
validation_score = model.evaluate(x_validation, y_validation, batch_size=1)
print('validation score : {} MSE'.format(validation_score))
model.reset_states()

In [None]:
# 7.5 여기서 모델을 저장해야 한다.
model.save('{}_lstm_model.h5'.format(ticker))


In [None]:
# 올해 데이터로 테스트하기.(test_set: 2018-01-01 ~ )
test_seq_out = []

test_mask = (stock['Date'] > TEST_START_DATE)
test = np.array(stock.loc[test_mask])

before_test_mask = (stock['Date'] < TEST_START_DATE)
before_test = np.array(stock.loc[before_test_mask])

test_seq_out = []
test_seq_in = np.array(before_test[-WINDOW_SIZE:])[:, [1]].tolist()
test_start_day = test_seq_in[-1][0]
for i in range(len(test)):
    normalized_test_seq_in = []
    first_test_seq_in_value = test_seq_in[0][0]
    for si in range(len(test_seq_in)):
        normalized_test_seq_in.append(test_seq_in[si][0] / float(first_test_seq_in_value) - 1)
    test_sample_in = np.array(normalized_test_seq_in)
    test_sample_in = np.reshape(test_sample_in, (1, WINDOW_SIZE, FEATURE_LENGTH))
    test_pred_out_normalized = model.predict(test_sample_in)
    test_pred_out = first_test_seq_in_value * (test_pred_out_normalized[0][0] + 1)
    test_seq_out.append(test_pred_out)
    test_seq_in = test_seq_in[1:]
    test_seq_in.append([test_pred_out])
model.reset_states()


In [None]:
from sklearn import cross_validation
from sklearn.linear_model import LinearRegression
### linear regression 예측 
offset = test.shape[0]

mask = (stock['Date'] > TRAINING_START_DATE)
df = stock.loc[train_mask]
df['Prediction'] = df[['Adj. Close']].shift(-offset)
X = np.array(df.drop(['Prediction', 'Date'], 1))

X_forecast = X[-offset:]
X = X[:-offset]

y = np.array(df['Prediction'])
y = y[:-offset]

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)

# Training
clf = LinearRegression()
clf.fit(X_train, y_train)

# Testing
confidence = clf.score(X_test, y_test)
print("confidence: ", confidence)
test_pred = clf.predict(X_forecast)

In [None]:

#### test 결과를 실제 값이랑 비교해서 보여주기
recent_year_data = np.array(stock[stock['Date'] > (max(stock['Date']) - pd.DateOffset(years=1)).date()])[:]
recent_year_data[:,0]
fig, ax = plt.subplots(1,1)
ax.plot(recent_year_data[:,0], recent_year_data[:,1], 'c', linewidth=1.4, label='Last 1 year')

ax.plot(test[:,0], test[:,1], 'c', linewidth=2.4, label='Real value')
ax.plot(test[:,0], test_seq_out, 'm', linewidth=2.4, label='LSTM Prediction')
ax.plot(test[:,0], test_pred, 'y', linewidth=2.4, label='Linear Regrssion Prediction')
plt.legend(loc=2, prop={'size': 10})
plt.xlabel('Date')
plt.ylabel('Price $')
plt.grid(linewidth=0.6, alpha=0.6)
plt.title('Predict test set Stock Price of {}'.format(ticker))
plt.show()

In [None]:
# 8. 예측하기
future_days = 20
seq_out = []
seq_in = np.array(stock[-WINDOW_SIZE:])[:, [1]].tolist()
today = seq_in[-1][0]

for i in range(future_days):
    normalized_seq_in = []
    first_seq_in_value = seq_in[0][0]
    for si in range(len(seq_in)):
        normalized_seq_in.append(seq_in[si][0] / float(first_seq_in_value) - 1)
    sample_in = np.array(normalized_seq_in)
    sample_in = np.reshape(sample_in, (1, WINDOW_SIZE, FEATURE_LENGTH))
    pred_out_normalized = model.predict(sample_in)
    pred_out = first_seq_in_value * (pred_out_normalized[0][0] + 1)
    seq_out.append(pred_out)
    seq_in = seq_in[1:]
    seq_in.append([pred_out])

model.reset_states()

print("today's price: {:0.2f}".format(today))
print('full prediction is : ', seq_out)
print('{} days after today price is {:0.2f}'.format(future_days, seq_out[-1].item()))

In [None]:
recent_year_data = np.array(stock[stock['Date'] > (max(stock['Date']) - pd.DateOffset(years=1)).date()])[:]
recent_year_data[:, 0]
fig, ax = plt.subplots(1, 1)
ax.plot(recent_year_data[:, 0], recent_year_data[:, 1], 'c', linewidth=1.4, label='Recent')
last_date = recent_year_data[-1, 0]
periods = future_days
dates = pd.date_range(
    start=last_date,
    periods=periods + 1,
    freq='D'
)
dates = dates[dates > last_date]
future_data_frame = pd.DataFrame({'date': dates})
ax.plot(future_data_frame['date'], seq_out, 'm', linewidth=2.4, label='LSTM Predict')
plt.legend(loc=2, prop={'size': 10})
plt.xlabel('Date')
plt.ylabel('Price $')
plt.grid(linewidth=0.6, alpha=0.6)
plt.title('Historical and Predicted Stock Price of {}'.format(ticker))
plt.show()
