In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import keras_tuner as kt
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
import tensorflow as tf
import joblib
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import os
import shutil


In [5]:
acb_data = pd.read_csv("D:\\Github anyud\\final\\Data_stock\\ACB_with_MA_and_Diff.csv")
aapl_data = pd.read_csv('D:\\Github anyud\\final\\Data_stock\\AAPL_with_MA_and_Diff.csv')
bid_data = pd.read_csv('D:\\Github anyud\\final\\Data_stock\\BID_with_MA_and_Diff.csv')
fpt_data = pd.read_csv('D:\\Github anyud\\final\\Data_stock\\FPT_with_MA_and_Diff.csv')
googl_data = pd.read_csv('D:\\Github anyud\\final\\Data_stock\\GOOGL_with_MA_and_Diff.csv')

In [8]:
# Function to split dataset into 80% training and 20% test sets
def split_data(df):
    train_size = int(len(df) * 0.8)
    train_set = df.iloc[:train_size]
    test_set = df.iloc[train_size:]
    return train_set, test_set

# Function to normalize data (excluding date columns)
def normalize_data(train, test):
    scaler = MinMaxScaler()
    
    # Drop non-numerical columns (e.g., Date column) for normalization
    train_numeric = train.drop(columns=['Date'])
    test_numeric = test.drop(columns=['Date'])
    
    # Fit the scaler on the training data and transform both train and test sets
    train_scaled = scaler.fit_transform(train_numeric)
    test_scaled = scaler.transform(test_numeric)
    
    return train_scaled, test_scaled

# Split each dataset
aapl_train, aapl_test = split_data(aapl_data)
acb_train, acb_test = split_data(acb_data)
bid_train, bid_test = split_data(bid_data)
fpt_train, fpt_test = split_data(fpt_data)
googl_train, googl_test = split_data(googl_data)

# Normalize each dataset
aapl_train_scaled, aapl_test_scaled = normalize_data(aapl_train, aapl_test)
acb_train_scaled, acb_test_scaled = normalize_data(acb_train, acb_test)
bid_train_scaled, bid_test_scaled = normalize_data(bid_train, bid_test)
fpt_train_scaled, fpt_test_scaled = normalize_data(fpt_train, fpt_test)
googl_train_scaled, googl_test_scaled = normalize_data(googl_train, googl_test)

# Print the shape of the normalized datasets to verify
print(f"AAPL train data points: {aapl_train_scaled.shape}")
print(f"ACB train data points: {acb_train_scaled.shape}")
print(f"BID train data points: {bid_train_scaled.shape}")
print(f"FPT train data points: {fpt_train_scaled.shape}")
print(f"GOOGL train data points: {googl_train_scaled.shape}")

AAPL train data points: (1952, 3)
ACB train data points: (1944, 3)
BID train data points: (1934, 3)
FPT train data points: (1934, 3)
GOOGL train data points: (1952, 3)


In [15]:
# Function to prepare sequences for LSTM
def create_sequences(data, time_steps=30):
    sequences = []
    labels = []
    for i in range(len(data) - time_steps):
        # Create sequences (input data) using time_steps rows
        sequence = data[i:i + time_steps, :]
        # Label is the value of 'Price' at the next time step
        label = data[i + time_steps, 0]  # Assuming 'Price' is the first column
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)


In [19]:
# Function to build and train LSTM model and print loss/val_loss per epoch
def train_lstm_model(train_sequences, train_labels, stock_name):
    model = Sequential()
    model.add(LSTM(50, return_sequences=False, input_shape=(train_sequences.shape[1], train_sequences.shape[2])))
    model.add(Dense(1))  # Output layer

    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train model with validation split (20% of the training data will be used for validation)
    history = model.fit(train_sequences, train_labels, epochs=50, batch_size=32, validation_split=0.2, verbose=0)  # verbose=0 to suppress default output
    
    # Print loss and val_loss for each epoch
    for i in range(len(history.history['loss'])):
        loss = history.history['loss'][i]
        val_loss = history.history['val_loss'][i]
        print(f"Epoch {i+1}: loss = {loss:.6f} - val_loss = {val_loss:.6f}")
    
    print(f"Training complete for {stock_name}")
    return model


In [20]:
# Create sequences for training and testing datasets for each stock
aapl_train_sequences, aapl_train_labels = create_sequences(aapl_train_scaled)
aapl_test_sequences, aapl_test_labels = create_sequences(aapl_test_scaled)

acb_train_sequences, acb_train_labels = create_sequences(acb_train_scaled)
acb_test_sequences, acb_test_labels = create_sequences(acb_test_scaled)

bid_train_sequences, bid_train_labels = create_sequences(bid_train_scaled)
bid_test_sequences, bid_test_labels = create_sequences(bid_test_scaled)

fpt_train_sequences, fpt_train_labels = create_sequences(fpt_train_scaled)
fpt_test_sequences, fpt_test_labels = create_sequences(fpt_test_scaled)

googl_train_sequences, googl_train_labels = create_sequences(googl_train_scaled)
googl_test_sequences, googl_test_labels = create_sequences(googl_test_scaled)


In [21]:
# Train LSTM models for each stock and display loss/val_loss per epoch
aapl_model = train_lstm_model(aapl_train_sequences, aapl_train_labels, "AAPL")
acb_model = train_lstm_model(acb_train_sequences, acb_train_labels, "ACB")
bid_model = train_lstm_model(bid_train_sequences, bid_train_labels, "BID")
fpt_model = train_lstm_model(fpt_train_sequences, fpt_train_labels, "FPT")
googl_model = train_lstm_model(googl_train_sequences, googl_train_labels, "GOOGL")


Epoch 1: loss = nan - val_loss = nan
Epoch 2: loss = nan - val_loss = nan
Epoch 3: loss = nan - val_loss = nan
Epoch 4: loss = nan - val_loss = nan
Epoch 5: loss = nan - val_loss = nan
Epoch 6: loss = nan - val_loss = nan
Epoch 7: loss = nan - val_loss = nan
Epoch 8: loss = nan - val_loss = nan
Epoch 9: loss = nan - val_loss = nan
Epoch 10: loss = nan - val_loss = nan
Epoch 11: loss = nan - val_loss = nan
Epoch 12: loss = nan - val_loss = nan
Epoch 13: loss = nan - val_loss = nan
Epoch 14: loss = nan - val_loss = nan
Epoch 15: loss = nan - val_loss = nan
Epoch 16: loss = nan - val_loss = nan
Epoch 17: loss = nan - val_loss = nan
Epoch 18: loss = nan - val_loss = nan
Epoch 19: loss = nan - val_loss = nan
Epoch 20: loss = nan - val_loss = nan
Epoch 21: loss = nan - val_loss = nan
Epoch 22: loss = nan - val_loss = nan
Epoch 23: loss = nan - val_loss = nan
Epoch 24: loss = nan - val_loss = nan
Epoch 25: loss = nan - val_loss = nan
Epoch 26: loss = nan - val_loss = nan
Epoch 27: loss = nan 