In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sqlalchemy import create_engine
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
cnxn = create_engine('sqlite:///vn.db')

In [3]:
query = "SELECT * FROM HSX WHERE ticker = 'PVT' ORDER BY date"

In [4]:
df = pd.read_sql(query, cnxn, parse_dates=['date'])

In [5]:
df.tail()

Unnamed: 0,ticker,date,open,high,low,close,volume
1795,PVT,2020-08-26,10.9,11.4,10.9,11.2,3453880.0
1796,PVT,2020-08-27,11.2,11.3,11.1,11.2,1019160.0
1797,PVT,2020-08-28,11.4,11.5,11.3,11.35,2365230.0
1798,PVT,2020-09-01,11.7,12.0,11.65,11.9,2974670.0
1799,PVT,2020-09-03,12.0,12.1,11.75,11.85,2647810.0


In [10]:
df['mid'] = (df['high'] + df['low']) / 2

In [9]:
df['close_change'] = df['close'] - df['close'].shift(-1)

In [10]:
def change_classify(x):
    if x < 0:
        y = 0
    else:
        y = 1
    return y

In [12]:
df['close_change_classified'] = df['close_change'].apply(change_classify)

In [15]:
df = pd.get_dummies(df, columns=['close_change_classified'])

In [17]:
train_set = df.iloc[:-40,-2:]
test_set = df.iloc[-40:,-2:]

In [18]:
day_to_window = 90

In [19]:
sc = MinMaxScaler(feature_range=(0,1))
stage_train = sc.fit_transform(train_set.values.reshape(-1,1))
X_train = []
y_train = []
for i in range(day_to_window, len(stage_train)):
    X_train.append(stage_train[i-day_to_window:i, :])
    y_train.append(stage_train[i, :])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [20]:
dataset_total = pd.concat((train_set, test_set), axis = 0)
inputs = dataset_total[len(df) - len(test_set) - day_to_window:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
y_test = []
for i in range(day_to_window, day_to_window + len(test_set)):
    X_test.append(inputs[i-day_to_window:i, :])
    y_test.append(inputs[i, :])
X_test = np.array(X_test)
y_test = np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [22]:
model = Sequential()

model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))

model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(50))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5)

nn = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs = 200)#, batch_size = 32)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots()
ax.plot(nn.history['loss'])
ax.plot(nn.history['val_loss'], color='y')

In [13]:
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

In [14]:
real = test_set.values

In [15]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        name='real',
        x=df.iloc[-40:,:]['date'],
        y=real.flatten()
    )
)

fig.add_trace(
    go.Scatter(
        name='pred',
        x=df.iloc[-40:,:]['date'],
        y=predicted_stock_price.flatten()
    )
)
fig.show()

In [40]:
mean_squared_error(real, predicted_stock_price)

5.409361499134738

In [None]:
0.08086984669252982