In [3]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import csv

timesteps = 3
data_size = 900 # datasize selected must have both attack and normal data.
data_resize = int(data_size//timesteps) #data_size/timesteps using // because round down, example 10/3=3
data_trunc_size = data_resize * timesteps # remove extra rows for so that data can be divided by timesteps

num_classes = timesteps # follow timestep
data_dim = 8
batchsize = 32 # number of data in a batch
drop = 0.2

#Date,Close_10year_treasury,Close_copper,Close_gold,Close_hk_index,Close_oil,Close_s&p,Value_us_sgd,Close
#%%
# load dataset
dataset = pd.read_csv("finance regression.csv")

df = pd.DataFrame(dataset)
print(df)
	# summary statistics
print(df.describe())

corr=df.corr(method ='pearson')
print(corr)
corr.to_csv('corr.csv')

x_data = dataset.iloc[:data_size,1:data_dim].values
y_data = dataset.iloc[:data_size,data_dim].values

# Reduce the size of data so that the data can be divided by time step
# split into input (X) and output (Y) variables
X = x_data[:data_trunc_size,0:data_dim]
Y = y_data[:data_trunc_size]

X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
y_test_copy = y_test.copy()
y_train_copy = y_train.copy()

X_train = X_scaler.fit_transform(X_train)
y_train = Y_scaler.fit_transform(y_train.reshape(len(y_train),1))

X_test = X_scaler.transform(X_test)
y_test = Y_scaler.transform(y_test.reshape(len(y_test),1))

# required format for lstm
X_train_shaped = X_train.reshape(int(data_resize*0.7), timesteps, data_dim-1)
X_test_shaped = X_test.reshape(int(data_resize*0.3), timesteps, data_dim-1) 

Y_train_shaped = y_train.reshape(int(data_resize*0.7),timesteps)
Y_test_shaped = y_test.reshape(int(data_resize*0.3),timesteps)

print("X shape is : {}".format(X_train_shaped.shape))
print("Y shape is : {}".format(Y_train_shaped.shape))



# Split data into train and test sets


# Create the model
# expected input data shape: (batch_size, timesteps, data_dim)
# Dropout used to prevent over-fitting.
# Input shape will infer the batch size by itself
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(timesteps, data_dim-1)))  # returns a sequence of vectors of dimension 40
model.add(LSTM(64,return_sequences=True))  # returns a sequence of vectors of dimension 40
model.add(LSTM(64))  # return a single vector of dimension 40
model.add(Dense(216, activation='relu'))
model.add(Dense(216, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='relu'))
model.compile(loss='mse', optimizer='adam', metrics=['mse'])

# Train the model
model.fit(X_train_shaped, Y_train_shaped, batch_size= batchsize, epochs=300, validation_data= (X_test_shaped, Y_test_shaped),verbose=0)

# Evaluate the model
# Returns you the accuracy and loss
loss, acc = model.evaluate(X_train_shaped, Y_train_shaped,timesteps)

print("Keras: {} {}".format(model.metrics_names[1], acc))

# Shape of prediction is nrow * timestep
# Result would be that same as keras evaluate
test_pred = model.predict(X_train_shaped)
train_pred_inv = Y_scaler.inverse_transform(test_pred)
train_rsme = np.sqrt(np.mean((y_train_copy - train_pred_inv.flatten())**2))
print("Train RSME is {}".format(train_rsme))
print()

test_pred = model.predict(X_test_shaped)
test_pred_inv = Y_scaler.inverse_transform(test_pred)
test_rsme = np.sqrt(np.mean((y_test_copy - test_pred_inv.flatten())**2))
print("Test RSME is {}".format(test_rsme))
print()

           Date  Close_10year_treasury  ...  Value_us_sgd    Close
0     27/3/2015                  1.948  ...        1.3690  3450.10
1     30/3/2015                  1.963  ...        1.3759  3454.26
2     31/3/2015                  1.934  ...        1.3716  3447.01
3      1/4/2015                  1.868  ...        1.3622  3447.02
4      2/4/2015                  1.904  ...        1.3567  3453.75
...         ...                    ...  ...           ...      ...
1155  12/3/2020                  0.849  ...        1.4105  2678.64
1156  16/3/2020                  0.728  ...        1.4201  2495.77
1157  17/3/2020                  0.997  ...        1.4295  2454.53
1158  18/3/2020                  1.266  ...        1.4397  2425.62
1159  19/3/2020                  1.119  ...        1.4522  2311.00

[1160 rows x 9 columns]
       Close_10year_treasury  Close_copper  ...  Value_us_sgd        Close
count            1160.000000   1160.000000  ...   1160.000000  1160.000000
mean                2