In [3]:
import numpy as np
import pandas as pd

import os
import matplotlib.pyplot as plt
import pandas_datareader as web
import datetime as dt

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Preprocessing

In [4]:
aat = pd.read_csv("/content/AAT.csv")
# hsg = pd.read_csv("/content/HSG.csv")
# qbs = pd.read_csv("/content/QBS.csv")
# stk = pd.read_csv("/content/STK.csv")

In [5]:
aat.head()

Unnamed: 0,code,date,modificable_price,close_price,change_price_value,change_price_percent,open_price,KL_GD_khoplenh,GT_GD_khoplenh,KL_GD_thoathuan,GT_GD_thoathuan,highest_price,lowest_price
0,AAT,28/11/2023,4.99,4.99,0.05,1.01%,4.94,241600,1193000000,0,0,4.99,4.92
1,AAT,27/11/2023,4.94,4.94,0.04,0.82%,4.9,173500,856000000,0,0,4.95,4.9
2,AAT,24/11/2023,4.9,4.9,-0.02,-0.41%,4.89,157700,776000000,0,0,5.0,4.88
3,AAT,23/11/2023,4.92,4.92,0.01,0.20%,4.95,284400,1417000000,0,0,5.1,4.91
4,AAT,22/11/2023,4.91,4.91,0.0,0.00%,4.9,174000,852000000,0,0,4.96,4.88


In [8]:
aat.shape

(673, 13)

In [6]:
aat['date'].astype

<bound method NDFrame.astype of 0      28/11/2023
1      27/11/2023
2      24/11/2023
3      23/11/2023
4      22/11/2023
          ...    
668    30/03/2021
669    29/03/2021
670    26/03/2021
671    25/03/2021
672    24/03/2021
Name: date, Length: 673, dtype: object>

In [14]:
aat.columns

Index(['code', 'date', 'modificable_price', 'close_price',
       'change_price_value', 'change_price_percent', 'open_price',
       'KL_GD_khoplenh', 'GT_GD_khoplenh', 'KL_GD_thoathuan',
       'GT_GD_thoathuan', 'highest_price', 'lowest_price'],
      dtype='object')

In [65]:
# dt = aat.loc[:, ['close_price', 'highest_price', 'lowest_price']]
dt = aat.loc[:, ['close_price']]

In [23]:
dt.head(5)

Unnamed: 0,close_price,highest_price,lowest_price
0,4.99,4.99,4.92
1,4.94,4.95,4.9
2,4.9,5.0,4.88
3,4.92,5.1,4.91
4,4.91,4.96,4.88


In [26]:
dt.shape

(673, 3)

In [80]:
dt_train = dt[:int(dt.shape[0]/5*3)]
dt_val = dt[int(dt.shape[0]/5*3):int(dt.shape[0]/5*4)]
dt_test = dt[int(dt.shape[0]/5*4):]
dt_val = dt_val.reset_index(drop=True)
dt_test = dt_test.reset_index(drop=True)

In [72]:
def create_dataset(dt, prediction_days = 10):
  x = []
  y = []
  for i in range(prediction_days, len(dt)):
      x.append(dt[i - prediction_days:i])
      y.append(dt.loc[i])

  x, y = np.array(x), np.array(y)
  x = np.reshape(x, (x.shape[0], x.shape[2], x.shape[1]))
  return x, y

In [73]:
x_train, y_train = create_dataset(dt_train)
x_val, y_val = create_dataset(dt_val)
x_test, y_test = create_dataset(dt_test)

In [82]:
x_train.shape

(393, 1, 10)

In [85]:
x_train = np.repeat(x_train, 10, axis=0)
y_train = np.repeat(y_train, 10, axis=0)

# Model

In [70]:
def LSTM_model(input_0 = 3):
    model = Sequential()

    model.add(LSTM(units=10, return_sequences=True, input_shape=(1, 10)))
    model.add(Dropout(0.2))

    model.add(LSTM(units=10, return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(units=10))
    model.add(Dropout(0.2))

    model.add(Dense(units=1))

    return model

In [86]:
model = LSTM_model()
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, 1, 10)             840       
                                                                 
 dropout_9 (Dropout)         (None, 1, 10)             0         
                                                                 
 lstm_10 (LSTM)              (None, 1, 10)             840       
                                                                 
 dropout_10 (Dropout)        (None, 1, 10)             0         
                                                                 
 lstm_11 (LSTM)              (None, 10)                840       
                                                                 
 dropout_11 (Dropout)        (None, 10)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                

In [87]:
model.compile(
    optimizer='adam',
    loss='mean_squared_error'
)

# Training

In [88]:
checkpointer = ModelCheckpoint(
    filepath = 'weights_best.hdf5',
    verbose = 2,
    save_best_only = True
)

model.fit(
    x_train,
    y_train,
    epochs=25,
    batch_size = 32,
    callbacks = [checkpointer],
    validation_data=(x_val, y_val)
)

Epoch 1/25
Epoch 1: val_loss improved from inf to 246.61131, saving model to weights_best.hdf5
Epoch 2/25

  saving_api.save_model(


Epoch 2: val_loss improved from 246.61131 to 168.00456, saving model to weights_best.hdf5
Epoch 3/25
Epoch 3: val_loss improved from 168.00456 to 139.19650, saving model to weights_best.hdf5
Epoch 4/25
Epoch 4: val_loss improved from 139.19650 to 123.40822, saving model to weights_best.hdf5
Epoch 5/25
Epoch 5: val_loss improved from 123.40822 to 114.10083, saving model to weights_best.hdf5
Epoch 6/25
Epoch 6: val_loss improved from 114.10083 to 108.61443, saving model to weights_best.hdf5
Epoch 7/25
Epoch 7: val_loss improved from 108.61443 to 101.12115, saving model to weights_best.hdf5
Epoch 8/25
Epoch 8: val_loss improved from 101.12115 to 87.72378, saving model to weights_best.hdf5
Epoch 9/25
Epoch 9: val_loss improved from 87.72378 to 76.54762, saving model to weights_best.hdf5
Epoch 10/25
Epoch 10: val_loss improved from 76.54762 to 67.58178, saving model to weights_best.hdf5
Epoch 11/25
Epoch 11: val_loss improved from 67.58178 to 60.42580, saving model to weights_best.hdf5
Epoc

<keras.src.callbacks.History at 0x786ecdc4ed10>

# Test

In [None]:
# Plot the actual prices using a black line
plt.plot(actual_prices, color='black', label=f"Actual {COMPANY} price")

# Plot the predicted prices using a green line
plt.plot(predicted_prices, color='green', label=f"Predicted {COMPANY} price")

# Set the title of the plot using the company name
plt.title(f"{COMPANY} share price")

# Set the x-axis label as 'time'
plt.xlabel("time")

# Set the y-axis label using the company name
plt.ylabel(f"{COMPANY} share price")

# Display a legend to differentiate the actual and predicted prices
plt.legend()

# Show the plot on the screen
plt.show()

In [None]:
# Extract the last 'prediction_days' values from the model inputs
real_data = [model_inputs[len(model_inputs) + 1 - prediction_days:len(model_inputs + 1), 0]]

# Convert the real_data list to a numpy array
real_data = np.array(real_data)

# Reshape real_data to a 3D array with the appropriate dimensions for the LSTM model
real_data = np.reshape(real_data, (real_data.shape[0], real_data.shape[1], 1))

# Generate a prediction using the LSTM model with the real_data input
prediction = model.predict(real_data)

# Invert the scaling applied to the prediction to obtain the actual value
prediction = scaler.inverse_transform(prediction)

# Print the prediction result to the console
print(f"Prediction: {prediction[0][0]}")