In [4]:
import pandas as pd
import numpy as np
import keras_resnet.models
import keras
from keras.utils import np_utils
from sklearn import preprocessing

Using TensorFlow backend.


In [5]:
def dataPreprocessing(dataFile,normalize,seq_len):
    #data pre-processing
    data = pd.read_csv(dataFile,index_col=0)
    columnsTitles=["adjOpen","adjHigh","adjLow","adjVolume","adjClose"]
    data=data.reindex(columns=columnsTitles)
    if normalize:
        min_max_scaler = preprocessing.MinMaxScaler()
        data['adjOpen'] = min_max_scaler.fit_transform(
            data.adjOpen.values.reshape(-1, 1))
        data['adjHigh'] = min_max_scaler.fit_transform(
            data.adjHigh.values.reshape(-1, 1))
        data['adjLow'] = min_max_scaler.fit_transform(
            data.adjLow.values.reshape(-1, 1))
        data['adjVolume'] = min_max_scaler.fit_transform(
            data.adjVolume.values.reshape(-1, 1))
        data['adjClose'] = min_max_scaler.fit_transform(
            data.adjClose.values.reshape(-1, 1))
    amount_of_features = len(data.columns)
    dataX = data.as_matrix()
    sequence_length = seq_len + 1
    result = []
    # maxmimum date = lastest date - sequence length
    for index in range(len(dataX) - sequence_length):
        # index : index + seq_len days
        result.append(dataX[index: index + sequence_length])
    result = np.array(result)
    X = result[:, :-1]
    Y = result[:, -1][:, -1]
#     X_test = result[int(row):, :-1]
#     y_test = result[int(row):, -1][:, -1]
#     X_train = X_train.reshape(len(X_train),2,2,1)
#     X_test = X_test.reshape(len(X_test),2,2,1)
    X = np.reshape(
        X, (X.shape[0], X.shape[1], amount_of_features,1))
#     X_test = np.reshape(
#         X_test, (X_test.shape[0], X_test.shape[1], amount_of_features,1))
    return [X, Y]

In [6]:
import resnet
from keras.callbacks import ReduceLROnPlateau, CSVLogger, EarlyStopping

In [7]:
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
early_stopper = EarlyStopping(min_delta=0.001, patience=10)
csv_logger = CSVLogger('resnet_stockpredict.csv')
batch_size = 32
nb_classes = 1
nb_epoch = 200
img_channels = 1
seq_len = 5
img_rows, img_cols = seq_len, seq_len
normalize = True

In [11]:
X_train, Y_train  = dataPreprocessing('SPY_training.csv',normalize,seq_len)
X_test, Y_test = dataPreprocessing('SPY_testing.csv',normalize,seq_len)
# Convert class vectors to binary class matrices.
# Y_train = np_utils.to_categorical(Y_train)
# Y_test = np_utils.to_categorical(Y_test)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

print("X_train shape : {}".format(X_train.shape))
print("Y_train shape : {}".format(Y_train.shape))
print("X_test shape : {}".format(X_test.shape))
print("Y_test shape : {}".format(Y_test.shape))

X_train shape : (3734, 5, 5, 1)
Y_train shape : (3734,)
X_test shape : (887, 5, 5, 1)
Y_test shape : (887,)




In [None]:
model = resnet.ResnetBuilder.build_resnet_18((img_channels, img_rows, img_cols), nb_classes)
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
metrics=['accuracy'])
model.fit(X_train, Y_train,
          batch_size=batch_size,
          epochs=nb_epoch,
          validation_data=(X_test, Y_test),
          shuffle=True,
          callbacks=[lr_reducer, early_stopper, csv_logger])

Train on 3734 samples, validate on 887 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

In [None]:
metrics = model.evaluate(X_train, Y_train)
print('')
print(np.ravel(model.predict(X_train)))
print('training data results: ')
for i in range(len(model.metrics_names)):
    print(str(model.metrics_names[i]) + ": " + str(metrics[i]))