##### Import statements

In [2]:
import pandas as pd
from utils import create_win_data, normalize, denormalize, features_and_labels, train_test_split, evaluate
from models import Perceptron, LSTM, BiLSTM

##### Load dateset

In [3]:
df = pd.read_csv(
	filepath_or_buffer="data/result/sp500-with-sentiment.csv",
	index_col="Date"
)

df.head( 5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Neg,Neu,Pos,Compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-08-03,3288.26001,3302.72998,3284.530029,3294.610107,4438940000,0.07006,0.841299,0.088701,0.061833
2020-08-04,3289.919922,3306.840088,3286.370117,3306.51001,4414380000,0.052134,0.851244,0.096622,0.175166
2020-08-05,3317.370117,3330.77002,3317.370117,3327.77002,4535060000,0.070892,0.839919,0.089257,0.079282
2020-08-06,3323.169922,3351.030029,3318.139893,3349.159912,4278930000,0.076577,0.815756,0.107628,0.156222
2020-08-07,3340.050049,3352.540039,3328.719971,3351.280029,4110030000,0.072143,0.843195,0.084597,0.079914


In [4]:
data = df[ [ "Compound", "Close"]].values

print( f"Data Shape: { data.shape}")

Data Shape: (905, 2)


##### Static variable

In [5]:
SPLIT = 0.85
SEQUENCE_LENGTH = 11
BATCH_SIZE = 100
INPUT_DIM = data.shape[ -1]
INPUT_TIMESTEPS = 10
NEURONS = 50
EPOCHS = 10

##### Split train and test data

In [6]:
train_data, test_data = train_test_split( data, SPLIT)

print( f"Train Data Shape: { train_data.shape}")
print( f"Test Data Shape: { test_data.shape}")

Train Data Shape: (769, 2)
Test Data Shape: (136, 2)


##### Prepare train data

In [7]:
train_data_windows = create_win_data( train_data, SEQUENCE_LENGTH)
X_train, y_train = features_and_labels( train_data_windows)
normalized_train_data, record_min_train, record_max_train = normalize( train_data_windows)
X_train_normalized, y_train_normalized = features_and_labels( normalized_train_data)

##### Prepare test data

In [8]:
test_data_windows = create_win_data( test_data, SEQUENCE_LENGTH)
X_test, y_test = features_and_labels( test_data_windows)
normalized_test_data, record_min_test, record_max_test = normalize( test_data_windows)
X_test_normalized, y_test_normalized = features_and_labels( normalized_test_data)

##### Perceptron

In [9]:
perceptron = Perceptron( input_shape=( INPUT_TIMESTEPS, INPUT_DIM))

perceptron.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10, 1)             3         
                                                                 
Total params: 3 (12.00 Byte)
Trainable params: 3 (12.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
perceptron.fit(
	input=X_train_normalized,
	label=y_train_normalized,
	epochs=EPOCHS,
	batch_size=BATCH_SIZE
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


##### Evaluate model prediction on train data

In [11]:
y_train_pred_normalized_perceptron = perceptron.predict( X_train_normalized)
y_train_pred_perceptron = denormalize( y_train_pred_normalized_perceptron, record_min_train, record_max_train)

evaluate( y_train, y_train_pred_perceptron)

MSE: 276896.09
RMSE: 526.21
MAE: 424.42
MAPE: 10.34%


##### Evaluate model prediction on test data

In [12]:
y_test_pred_normalized_perceptron = perceptron.predict( X_test_normalized)
y_test_pred_perceptron = denormalize( y_test_pred_normalized_perceptron, record_min_test, record_max_test)

evaluate( y_test, y_test_pred_perceptron)

MSE: 180242.83
RMSE: 424.55
MAE: 345.80
MAPE: 7.39%


##### LSTM

In [13]:
lstm = LSTM(
	neurons=NEURONS,
	input_shape=( INPUT_TIMESTEPS, INPUT_DIM)
)

lstm.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 50)            10600     
                                                                 
 lstm_1 (LSTM)               (None, 50)                20200     
                                                                 
 dense_1 (Dense)             (None, 1)                 51        
                                                                 
Total params: 30851 (120.51 KB)
Trainable params: 30851 (120.51 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
lstm.fit(
	input=X_train_normalized,
	label=y_train_normalized,
	epochs=EPOCHS,
	batch_size=BATCH_SIZE
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


##### Evaluate model prediction on train data

In [15]:
y_train_pred_normalized_lstm = lstm.predict( X_train_normalized)
y_train_pred_lstm = denormalize( y_train_pred_normalized_lstm, record_min_train, record_max_train)

evaluate( y_train, y_train_pred_lstm)

MSE: 2672.61
RMSE: 51.70
MAE: 40.22
MAPE: 0.99%


##### Evaluate model prediction on test data

In [16]:
y_test_pred_normalized_lstm = lstm.predict( X_test_normalized)
y_test_pred_lstm = denormalize( y_test_pred_normalized_lstm, record_min_test, record_max_test)

evaluate( y_test, y_test_pred_lstm)

MSE: 1316.54
RMSE: 36.28
MAE: 30.32
MAPE: 0.66%


##### Bidirectional LSTM

In [17]:
bilstm = BiLSTM(
	neurons=NEURONS,
	input_shape=( INPUT_TIMESTEPS, INPUT_DIM)
)

bilstm.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirection  (None, 10, 100)           21200     
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 100)               60400     
 onal)                                                           
                                                                 
 dense_2 (Dense)             (None, 1)                 101       
                                                                 
Total params: 81701 (319.14 KB)
Trainable params: 81701 (319.14 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [18]:
bilstm.fit(
	input=X_train_normalized,
	label=y_train_normalized,
	epochs=EPOCHS,
	batch_size=BATCH_SIZE
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


##### Evaluate model prediction on train data

In [19]:
y_train_pred_normalized_bilstm = bilstm.predict( X_train_normalized)
y_train_pred_bilstm = denormalize( y_train_pred_normalized_bilstm, record_min_train, record_max_train)

evaluate( y_train, y_train_pred_bilstm)

MSE: 2469.47
RMSE: 49.69
MAE: 38.00
MAPE: 0.94%


##### Evaluate model prediction on test data

In [20]:
y_test_pred_normalized_bilstm = bilstm.predict( X_test_normalized)
y_test_pred_bilstm = denormalize( y_test_pred_normalized_bilstm, record_min_test, record_max_test)

evaluate( y_test, y_test_pred_bilstm)

MSE: 1075.80
RMSE: 32.80
MAE: 26.98
MAPE: 0.59%
