In [2]:
import numpy as np 
import pandas as pd

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from keras.models import Sequential, load_model, Model
from keras.layers import Dense, Activation, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Dropout
from keras.optimizers import Adam

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

Using TensorFlow backend.


In [3]:
#load Data
data_path = 'Outputs/total_array.csv'
raw_data = np.loadtxt(data_path, delimiter=',', skiprows=0)

In [4]:
print(raw_data.shape)
num_sample = raw_data.shape[0]
num_time_steps = 10
x_dim = 201
y_dim = 1

(2121, 211)


In [5]:
scaler = MinMaxScaler(feature_range=(0, 1))
raw_data[:, 1: 11] = scaler.fit_transform(raw_data[:, 1: 11])

In [6]:
#Loading X and Y variables, changes for different models
X = np.zeros((num_sample, num_time_steps, x_dim))
y = []

for sample in range(num_sample):
    data_input = raw_data[sample, :]
    stock_labels = raw_data[sample, 0: num_time_steps + 1]
    
    for step in range(1, num_time_steps + 1):
        X[sample, step - 1, 0] = stock_labels[step]

        if step == num_time_steps - 1:
            y.append(stock_labels[0])
            X[sample, step, 1:] = data_input[num_time_steps + 1:]

In [7]:
scaler_y = MinMaxScaler(feature_range=(0, 1))
y = scaler_y.fit_transform(np.array(y).reshape((len(y), 1)))

In [8]:
#train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [9]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_train)

(1696, 10, 201)
(425, 10, 201)
(1696, 1)
[[0.42404778]
 [0.32629655]
 [0.77691383]
 ...
 [0.46114742]
 [0.65388275]
 [0.87636792]]


In [32]:
n_a = 128 # number of dimensions for the hidden state of each LSTM cell.

In [33]:
n_values = 201 # number of input values
reshapor = Reshape((1, n_values))                       
LSTM_cell = LSTM(n_a, return_state = True)  
densor = Dense(1)

In [96]:
def stockmodel(Tx, n_a, n_values):
    """
    Implement the model
    
    Arguments:
    Tx -- length of the sequence in a corpus
    n_a -- the number of activations used in our model
    n_values -- number of unique values in the music data 
    
    Returns:
    model -- a keras model with the 
    """
    
    # Define the input of your model with a shape 
    X = Input(shape=(Tx, n_values))
    
    # Define s0, initial hidden state for the decoder LSTM
    a0 = Input(shape=(n_a,), name='a0')
    c0 = Input(shape=(n_a,), name='c0')
    a = a0
    c = c0
    
    ### START CODE HERE ### 
    # Step 1: Create empty list to append the outputs while you iterate (≈1 line)
    outputs = []
    
    # Step 2: Loop
    for t in range(Tx):
        
        # Step 2.A: select the "t"th time step vector from X. 
        x = Lambda(lambda x: x[:,t,:])(X)
        # Step 2.B: Use reshapor to reshape x to be (1, n_values) (≈1 line)
        x = reshapor(x)
        # Step 2.C: Perform one step of the LSTM_cell
        a, _, c = LSTM_cell(x, initial_state=[a, c])
        # Step 2.D: Apply densor to the hidden state output of LSTM_Cell
        out = densor(a)
        # Step 2.E: add the output to "outputs"
        outputs.append(out)
        
    # Step 3: Create model instance
    model = Model(inputs=[X, a0, c0], outputs=outputs)
    
    ### END CODE HERE ###
    
    return model

In [97]:
model = stockmodel(Tx = 10 , n_a = 128, n_values = 201)

In [10]:
model = Sequential()
model.add(LSTM(64, batch_input_shape=(1, 10, 201), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')







In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (1, 64)                   68096     
_________________________________________________________________
dense_1 (Dense)              (1, 1)                    65        
Total params: 68,161
Trainable params: 68,161
Non-trainable params: 0
_________________________________________________________________


In [26]:
opt = Adam(lr=0.1, beta_1=0.9, beta_2=0.999, decay=0.01)

model.compile(optimizer=opt, loss='mean_squared_error')

In [14]:
m = 2121
a0 = np.zeros((m, n_a))
c0 = np.zeros((m, n_a))

In [11]:
model.fit(X_train, y_train, epochs=50, batch_size=1)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7ffaaf162b00>

In [10]:
trainPredict = model.predict(X_train, batch_size=1)

In [11]:
trainPredict = scaler_y.inverse_transform(trainPredict)
y_train = scaler_y.inverse_transform(y_train)

In [14]:
for idx, pred in enumerate(trainPredict):
    print(pred, y_train[idx, 0])

[2477.356] 2477.31
[2395.995] 2399.24
[2755.774] 2759.13
[2275.9062] 2272.23
[2396.3948] 2397.71
[2712.5417] 2713.19
[2715.6382] 2711.51
[2270.1865] 2267.56
[2769.6936] 2771.34
[2846.2976] 2841.92
[2434.5789] 2431.52
[2499.5537] 2500.83
[2389.927] 2390.19
[2721.88] 2723.82
[2508.192] 2505.86
[2875.072] 2873.69
[2851.5393] 2844.07
[2180.9126] 2176.26
[2734.494] 2738.7
[2447.72] 2445.35
[2729.3782] 2729.17
[2650.8452] 2648.77
[2508.2878] 2506.68
[2635.586] 2633.45
[2796.3052] 2794.74
[2904.0002] 2898.5
[2862.3848] 2855.14
[2959.097] 2933.58
[2789.0889] 2783.38
[2890.866] 2886.95
[2568.4153] 2563.69
[2568.4531] 2566.42
[2563.186] 2562.64
[2723.7234] 2723.78
[2384.6294] 2382.86
[2769.5256] 2770.67
[2667.398] 2666.4
[2604.6487] 2603.48
[2860.311] 2858.59
[2821.6677] 2815.93
[2510.181] 2506.8
[2839.518] 2840.13
[2548.282] 2545.24
[2446.208] 2446.21
[2803.2336] 2801.15
[2539.5454] 2536.91
[2657.8652] 2651.75
[2505.9717] 2505.02
[2620.647] 2623.09
[2252.2903] 2251.34
[2339.1855] 2341.94
[2272.

[2732.8655] 2729.23
[2433.802] 2430.89
[2913.1843] 2914.39
[2828.3628] 2821.19
[2749.7847] 2747.4
[2433.2957] 2429.02
[2814.8513] 2815.23
[2808.5664] 2802.6
[2574.136] 2571.46
[2935.2725] 2934.06
[2875.1858] 2866.5
[2712.1902] 2706.34
[2771.6973] 2770.71
[2894.0479] 2891.29
[2815.88] 2807.33
[2737.6582] 2727.83
[2737.2378] 2735.06
[2799.4788] 2795.72
[2554.999] 2494.12
[2381.752] 2388.32
[2665.832] 2668.76
[2909.0654] 2907.26
[2193.479] 2185.62
[2459.686] 2462.4
[2806.117] 2807.19
[2845.2568] 2840.4
[2395.5576] 2393.43
[2849.6248] 2850.84
[2662.147] 2657.86
[2867.719] 2864.78
[2628.7256] 2625.13
[2443.1616] 2441.43
[2295.8508] 2296.98
[2445.5308] 2446.29
[2676.79] 2677.26
[2903.3267] 2899.78
[2514.5664] 2510.61
[2667.5818] 2668.55
[2647.2104] 2644.17
[2497.4587] 2495.74
[2909.512] 2909.96
[2458.9155] 2453.24
[2669.544] 2671.48
[2728.9006] 2727.84
[2721.154] 2716.79
[2864.5862] 2860.67
[2680.1116] 2676.29
[2811.1616] 2806.21
[2367.9722] 2361.89
[2750.6528] 2749.34
[2911.0007] 2906.67
[2

[2676.9224] 2668.58
[2553.0308] 2551.08
[2689.5125] 2688.88
[2909.7695] 2907.08
[2552.184] 2536.1
[2782.859] 2782.14
[2368.0264] 2366.19
[2340.5735] 2341.47
[2264.5928] 2265.61
[2442.554] 2445.25
[2691.2358] 2686.86
[2499.1033] 2498.79
[2888.6113] 2885.81
[2272.8708] 2265.49
[2751.7368] 2753.6
[2473.9644] 2471.17
[2497.9243] 2499.06
[2724.3093] 2724.33
[2175.1108] 2170.72
[2789.7764] 2793.55
[2338.801] 2336.22
[2592.3264] 2598.27
[2746.6443] 2743.03
[2858.7195] 2854.67
[2775.5374] 2768.04
[2440.4487] 2438.54
[2787.6511] 2788.15
[2687.301] 2681.08
[2781.4456] 2778.12
[2474.8765] 2472.48
[2817.077] 2816.14
[2254.5361] 2251.0
[2454.6973] 2457.31
[2357.585] 2358.99
[2836.7058] 2838.35
[2456.8945] 2454.97
[2553.3843] 2553.35
[2803.7183] 2800.63
[2479.779] 2477.28
[2589.3086] 2590.11
[2248.1729] 2247.09
[2804.4792] 2806.05
[2916.7678] 2911.23
[2333.2886] 2328.24
[2934.1067] 2937.3
[2580.55] 2576.16
[2400.5444] 2400.33
[2789.267] 2789.55
[2359.536] 2357.64
[2427.3586] 2430.02
[2882.0618] 2883

In [12]:
print(r2_score(y_train, trainPredict))
print(mean_squared_error(y_train, trainPredict))
print(mean_absolute_error(y_train, trainPredict))

0.9992389697768501
27.683096078317902
2.630557596458579


In [13]:
testPredict = model.predict(X_test, batch_size=1)

In [14]:
testPredict = scaler_y.inverse_transform(testPredict)
y_test = scaler_y.inverse_transform(y_test)

In [1]:
for idx, pred in enumerate(testPredict):
    print(pred, y_test[idx])
    

NameError: name 'testPredict' is not defined

In [16]:
print(r2_score(y_test, testPredict))
print(mean_squared_error(y_test, testPredict))
print(mean_absolute_error(y_test, testPredict))

0.9939761212553546
212.02207938127015
3.024827205882342
