In [3]:
import numpy as np
import matplotlib.pyplot as plt
from keras import backend
import capstone_support as cs
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import datetime
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler

# The stock we are interested in
stock_name = "QCOM"
# Data set includes stock prices beginning at start_date
stock_start_date = '2006-01-01'
# Number of historical days (M) to use for the prediction
# In baseline model, this is used to generate the prediction
# In lstm, this is an input sequence
M_historical_days = 10
# Predict for N days ahead
N_predict_days = 14
# Splits to use for TimeSeriesSplit
timeSeriesSplitCount = 10

def rmse_score(predicted, true_label):
    return math.sqrt(mean_squared_error(true_label, predicted))

def rmse_metric(y, y_pred):
    return backend.sqrt(backend.mean(backend.square(y_pred - y), axis=-1))

def prepareLSTMModelData(stockdf, timesteps):
    num_samples = stockdf.shape[0] - (timesteps)
    num_features = stockdf.shape[1]
    X = np.zeros((num_samples, timesteps, num_features))
    y = np.zeros((num_samples))
    
    for i in range(num_samples):
        y_pos = i + timesteps
        X[i] = stockdf[i:y_pos]
        y[i] = stockdf['Adj. Close'][y_pos]

    return (X, y)

def validateLSTMModel(ax, df, m_days, n_days, splits, dates, \
                      neurons=20, num_epochs=1, mBatches=1, mStateFul=False, lossFn='mae', modelOptimizer='adam'):
    tsSplit = TimeSeriesSplit(n_splits=splits)
    lstm_scores = []
    for train_index, test_index in tsSplit.split(df):
        scaler_train = MinMaxScaler(feature_range=(-1, 1))
        scaler_test = MinMaxScaler(feature_range=(-1, 1))
        train, test = df[0:len(train_index)], df[1+len(train_index):1+len(train_index)+len(test_index)]
        train_scaled = scaler_train.fit_transform(train[df.columns])
        train_scaled = pd.DataFrame(train_scaled, columns=df.columns)
        test_scaled = scaler_test.fit_transform(test[df.columns])
        test_scaled = pd.DataFrame(test_scaled, columns=df.columns)

        X_train, y_train = prepareLSTMModelData(train_scaled, m_days)
        X_test, y_test = prepareLSTMModelData(test_scaled, m_days)
        test_dates = dates[1+len(train_index)+m_days:1+len(train_index)+len(test_index)]

        print("X train {} and test {}".format(X_train.shape, X_test.shape))
        print("Y train {} and test {}".format(y_train.shape, y_test.shape))
        print("Test dates {}".format(test_dates.shape))
        model = Sequential()
        if (mStateFul == True):
            model.add(LSTM(neurons, input_shape=(X_train.shape[1], X_train.shape[2]),\
                    return_sequences=True, \
                    stateful=True, batch_size=mBatches))
        else:
            model.add(LSTM(neurons, input_shape=(X_train.shape[1], X_train.shape[2]),\
                       return_sequences=True))
        #model.add(LSTM(20, return_sequences=True))
        model.add(LSTM(neurons))
        model.add(Dropout(0.2))
        model.add(Dense(1))
        model.compile(loss=lossFn, optimizer=modelOptimizer, metrics=[rmse_metric])
        print(model.summary())
        history = None
        for i in range(num_epochs):
            history = model.fit(X_train, y_train, #validation_data=(X_test, y_test),
                  epochs=1, batch_size=mBatches, verbose=2)
            model.reset_states()
        print(history.history.keys())
        predicted = model.predict(X_test, batch_size=1)

        y_test = y_test.reshape(-1,1)
        print("Reshaped predicted {} and y_test {}".format(predicted.shape, y_test.shape))

        y_pred = scaler_test.inverse_transform(predicted)
        y_pred_df = pd.DataFrame(y_pred, columns=["y_pred"])
        y_actual = scaler_test.inverse_transform(y_test)
        y_actual_df = pd.DataFrame(y_actual, columns=["y"])
        labels_df = pd.concat([y_pred_df, y_actual_df], axis=1)
        print("test_dates shape {} vs y_pred shape {} vs y_test shape {}" \
              .format(test_dates.shape, y_pred.shape, y_test.shape))

        print(labels_df[:5])
        print(predicted[:5])
        print(y_test[:5])

        if ax is not None:
            print("Plot graph here")
            cs.plot_graph(ax[len(lstm_scores)], test_dates, y_pred, y_actual)
            # summarize history for loss
            #ax[len(lstm_scores)].plot(history.history['rmse_metric'])
            #ax[len(lstm_scores)].plot(history.history['val_rmse_metric'])

            #ax[len(lstm_scores)].plot(history.history['mean_squared_error'])
            #ax[len(lstm_scores)].plot(history.history['val_mean_squared_error'])
            #         ax[len(lstm_scores)].title('model loss')
            #         ax[len(lstm_scores)].ylabel('loss')
            #         ax[len(lstm_scores)].xlabel('epoch')
            #         ax[len(lstm_scores)].legend(['train', 'test'], loc='upper left')
        lstm_scores.append(rmse_score(y_pred_df['y_pred'], y_actual_df['y']))
    print(lstm_scores)
    return lstm_scores

def run_lstm(stock_data, split_count, M_history, N_predict):
    plt.close('all')
    fig, ax = plt.subplots(1, split_count, figsize=(14,8))
    stock_frame = stock_data[:100]
    stock_dates = stock_frame.index.values
    validateLSTMModel(ax, stock_frame, M_history, N_predict, split_count, stock_dates, 32, 5, 20, False)
    plt.show()

def experiment_lstm_parameters(stock_data):
    stock_frame = stock_data
    stock_dates = stock_frame.index.values
    dataSplits = 2
    experiment_df = pd.DataFrame()
    historyList = [14, 30, 60]
    neuronList = [1, 5, 10, 30]
    statefulList = [False, True]
    epochList = [1, 10, 20, 30, 50]
    lossFnList = ['mae', 'mse']
    optimizerList = ['adam', 'sgd', 'rmsprop', 'adadelta']
    batchSizeList = [1, 10, 30]
    totalIterations = len(historyList)*len(neuronList)*len(statefulList)
    totalIterations *= len(epochList)*len(lossFnList)*len(optimizerList)*len(batchSizeList)
    i = 0
    for history in historyList:
        for neurons in neuronList:
            for stateful in statefulList:
                for epochs in epochList:
                    for lossFunction in lossFnList:
                        for optimizer in optimizerList:
                            for batchSize in batchSizeList:
                                if (batchSize > 1 and stateful is True):
                                    continue
                                expStr = "{}-{}-{}-{}-{}-{}-{}".format(history, stateful, neurons,\
                                                    epochs, lossFunction, optimizer, batchSize)
                                print(expStr)
                                i = i + 1
                                print("Iteration {} of {}".format(i, totalIterations))
                                result = validateLSTMModel(None, stock_frame, history, 14, dataSplits, stock_dates, \
                                                 neurons, epochs, batchSize, stateful, lossFunction)
                                experiment_df[expStr] = result
                                experiment_df.to_csv(str(i)+"_experiment.csv", index=False)
    experiment_df.to_csv('final_experiment_results.csv', index=False)

def run_experiment():
    stockdf = cs.get_stock_dataframe(stock_name, dateFrom = stock_start_date)
    stockdf = stockdf[['Adj. Close']]
    #run_lstm(stockdf, timeSeriesSplitCount, M_historical_days, N_predict_days)
    #run_lstm(stockdf, 2, 10, N_predict_days)
    experiment_lstm_parameters(stockdf)

run_experiment()

14-False-1-1-mae-adam-1
Iteration 1 of 3072
X train (1009, 14, 1) and test (1007, 14, 1)
Y train (1009,) and test (1007,)
Test dates (1007,)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 14, 1)             12        
_________________________________________________________________
lstm_6 (LSTM)                (None, 1)                 12        
_________________________________________________________________
dropout_3 (Dropout)          (None, 1)                 0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 2         
Total params: 26.0
Trainable params: 26.0
Non-trainable params: 0.0
_________________________________________________________________
None
Epoch 1/1
29s - loss: 0.2125 - rmse_metric: 0.2125
dict_keys(['rmse_metric', 'loss'])
Reshaped predicted (1007, 1) and y_test 