In [48]:
from utils.utilities import get_engine
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from datetime import datetime, timedelta

from keras.models import Sequential
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from keras.callbacks import EarlyStopping

In [49]:
engine = get_engine(country = 'AU')

In [50]:
stock_code = 'NEC'
query = f"""
    SELECT
        date,
        open,
        high,
        low,
        close
    FROM transaction
    WHERE 
        stock_code = '{stock_code}'
    ORDER BY date
"""
print(f"Query {stock_code}")
df = pd.read_sql_query(query, engine)
df = df.dropna().reset_index(drop = True)

Query NEC


In [51]:
def compute_diagonal_means(matrix):
    m, n = matrix.shape
    diagonals = [[(0,i)] for i in range(n)] + [[(i,n - 1)] for i in range(1,m)]

    vals = []
    for loc in range(len(diagonals)):
        next_val = (diagonals[loc][0][0] + 1, diagonals[loc][0][1] - 1)
        while next_val[0] < m and next_val[1] >= 0:
            diagonals[loc].append(next_val)
            next_val = (next_val[0] + 1, next_val[1] - 1)
        # Finish with the diagonal
        axis_1 = [t for t,_ in diagonals[loc]]
        axis_2 = [t for _,t in diagonals[loc]]
        vals.append(np.mean(matrix[axis_1,axis_2]))

    return np.array(vals)

In [52]:
def generate_dates(current_date,n):
    # Initialize an empty list to store the result
    next_dates = []

    # Iterate until we collect n non-weekend dates
    while len(next_dates) < n:
        # Increment the current date by one day
        current_date += timedelta(days=1)

        # Check if the current date is a weekend (Saturday or Sunday)
        if current_date.weekday() not in [5, 6]:
            next_dates.append(current_date)

    return next_dates

In [53]:
window = 90

# for price in ['open','high','low','close']:
x = np.array([df['close'].iloc[i:i + window].to_numpy() for i in range(len(df) - window)])
y = np.array([df['close'].iloc[i + window] for i in range(len(df) - window)])

x_train,x_test = x[:int(0.8 *len(x))], x[-(len(x) - int(0.8*len(x))):]
y_train,y_test = y[:int(0.8 *len(y))], y[-(len(y) - int(0.8*len(y))):]

x_train,x_val = x_train[:int(0.8 *len(x_train))], x_train[-(len(x_train) - int(0.8*len(x_train))):]
y_train,y_val = y_train[:int(0.8 *len(y_train))], y_train[-(len(y_train) - int(0.8*len(y_train))):]

# # Fit scaler on train only
scaler_data = np.concatenate((x_train[0,:],y_train), axis = 0)
mean = np.mean(scaler_data, axis = None)
std = np.std(scaler_data, axis = None)

transformer = np.vectorize(lambda x : (x - mean) / std)
inv_transformer = np.vectorize(lambda x : x*std + mean)

# apply on all data
x_train = transformer(x_train)
y_train = transformer(y_train)
x_val = transformer(x_val)
y_val = transformer(y_val)
x_test = transformer(x_test)

In [54]:
model = Sequential()
# encoder layer
model.add(LSTM(16,return_sequences = True,input_shape = (x_train.shape[1],1)))
model.add(LSTM(8,return_sequences = False))
model.add(Dense(4))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val), callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [55]:
# Test on test set
predictions = model.predict(x_test)
predictions = inv_transformer(predictions)

rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
print(rmse, np.std(predictions), std)

0.2896838772174901 0.13843937149675617 0.32167408291458083


In [56]:
# Predict all
prediction_df = df[['date','close']]
scaled_x = transformer(x)
predictions = np.squeeze(model.predict(scaled_x), axis = -1)
predictions = inv_transformer(predictions).tolist()
prediction_df['close_pred'] = x[0,:].tolist() + predictions





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [57]:
prediction_df

Unnamed: 0,date,close,close_pred
0,2013-12-06,1.151523,1.151523
1,2013-12-09,1.163155,1.163155
2,2013-12-10,1.122445,1.122445
3,2013-12-11,1.107905,1.107905
4,2013-12-12,1.087550,1.087550
...,...,...,...
2521,2023-12-19,2.020000,1.954226
2522,2023-12-20,2.020000,1.962812
2523,2024-01-08,1.990000,1.969908
2524,2024-01-10,1.995000,1.971884


In [58]:
DAY_RANGE = 365
day = 0
i = 0
while day < DAY_RANGE:
    last_x = np.array([prediction_df['close_pred'].iloc[-window:].to_numpy()])
    scaled_x = transformer(last_x)
    new_price = inv_transformer(model.predict(scaled_x)[0][0])

    next_date = prediction_df['date'].iloc[-1]
    while True:
        next_date += timedelta(days=1)
        # Check if the current date is a weekend (Saturday or Sunday)
        if next_date.weekday() not in [5, 6]:
            break

    prediction_df = pd.concat([prediction_df,pd.DataFrame([{'date' : next_date, 'close' : None, 'close_pred' : new_price}])], ignore_index = True)
    day += 1



In [60]:
fig = go.Figure(data=[
    go.Scatter(x = prediction_df['date'], y = prediction_df['close_pred'], name = 'Predict'),
    go.Scatter(x = prediction_df['date'], y = prediction_df['close'], name = 'True'),
])
fig.show()