### DATA PREPROGRESS

In [119]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


def one_hot_encoding(arr):
    s = set()
    _sd = dict()
    for e in arr:
        s.add(e)

    sl = list(s)
    for i in range(len(sl)):
        encode = np.zeros(len(sl))
        encode[i] = 1
        _sd[sl[i]] = encode

    encoded_data = []
    for k in range(len(arr)):
        encoded_data.append(_sd[arr[k]])

    return np.array(encoded_data)


def data_preprocess(filepath):
    df = pd.read_excel(filepath)
    data = df.to_numpy()

    ip = data[:, 2]

    encoded_ip = one_hot_encoding(ip)

    # print(encoded_ip)

    data[:, 2] = encoded_ip.tolist()

    # print(data)
    expanded_data = []
    for row in data:
        expanded_data.append(np.concatenate([row[:2], row[2], row[3:]]).tolist())

    expanded_data = np.array(expanded_data)
    
    for i in range(len(expanded_data)):
        expanded_data[i][0] = expanded_data[i][0] * 1000

    expanded_data = expanded_data[:176]

    training_x = expanded_data[:, 1:]
    training_y = expanded_data[:, 0]

    return training_x, training_y


# Data preprocessing
file_path = "D:\\model_fit\\training\\training_data\\output.xlsx"
training_x, training_y = data_preprocess(file_path)

### MODEL COMPILE & FIT

In [120]:
### Data preprocessing using MinMaxScaler
# create MinMaxScaler Object

from sklearn.preprocessing import MinMaxScaler

x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()


# scaled_training_x = training_x[:, 0].reshape(-1, 1)
# scaled_training_y = training_y[:].reshape(-1, 1)

# training_x[:, 0] = scaled_training_x[:, 0]
# training_y = scaled_training_y

# Define the split points based on time
train_size = int(len(training_x) * 0.8)
val_size = int(len(training_x) * 0.1)

# Split the data
x_train = training_x[:train_size]
y_train = training_y[:train_size]

x_val = training_x[train_size + val_size:]
y_val = training_y[train_size + val_size:]


# test data from other file
x_test = training_x[:train_size + val_size]
y_test = training_y[:train_size + val_size]

# x_train_first_column = x_train[:, 0].reshape(-1, 1)
# scaled_x_train_first_column = x_scaler.fit_transform(x_train_first_column)
# x_train[:, 0] = scaled_x_train_first_column.flatten()
# y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1))  # reshap to 2D

# make the same sacling on valid data set and test data set
# x_val_scaled = x_scaler.transform(x_val)
# x_val_first_column = x_val[:, 0].reshape(-1, 1)
# scaled_x_val_first_column = x_scaler.fit_transform(x_val_first_column)
# x_val[:, 0] = scaled_x_val_first_column.flatten()
# y_val_scaled = y_scaler.transform(y_val.reshape(-1, 1))  # reshap to 2D


# x_test_first_column = x_test[:, 0].reshape(-1, 1)
# scaled_x_test_first_column = x_scaler.fit_transform(x_test_first_column)
# x_test[:, 0] = scaled_x_test_first_column.flatten()
# y_test_scaled = y_scaler.transform(y_test.reshape(-1, 1))  # reshap to 2D

### NORMALIZATION

In [121]:
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Input
from keras.callbacks import Callback, EarlyStopping
from keras.optimizers import Adam


def create_sequences(dataset_x, dataset_y, time_steps):
    dataX, dataY = [], []

    # scale for data set
    if dataset_x.shape[0] < time_steps:
        assert dataset_x.shape[0] < time_steps
        # Handle the case where the input data is smaller than time_steps
        # You can choose to pad the sequence or handle it in a way that makes sense for your data
        # For example, you can replicate the single time step to create a sequence
        x_sequence = np.tile(dataset_x, (time_steps, 1))
        y_sequence = np.tile(dataset_y, (time_steps, 1))
        dataX.append(x_sequence)
        dataY.append(y_sequence)
    else:
        for i in range(dataset_x.shape[0] - time_steps + 1):
            x_sequence = dataset_x[i:i + time_steps, :]
            y_sequence = dataset_y[i + time_steps - 1]
            dataX.append(x_sequence)
            dataY.append(y_sequence)

    return np.array(dataX), np.array(dataY)

# Create sequences for each set
time_steps = 10


# x_test = training_x[train_size:]
# y_test = one_hot_encoding[train_size:]

# Create sequences for each set
x_train_sequences, y_train_sequences = create_sequences(x_train, y_train, time_steps)
x_val_sequences, y_val_sequences = create_sequences(x_val, y_val, time_steps)
x_test_sequences, y_test_sequences = create_sequences(x_test, y_test, time_steps)


# Initialize LossHistory with validation data
class LossHistory(Callback):
    def __init__(self, x_train, y_train, x_val, y_val):
        super().__init__()
        self.train_data = (x_train, y_train)
        self.validation_data = (x_val, y_val)
        self.losses = []
        self.val_losses = []
        self.train_errors = []
        self.val_errors = []

    def on_epoch_end(self, epoch, logs=None):
        self.losses.append(logs['loss'])
        self.val_losses.append(logs.get('val_loss'))
        self.train_errors.append(logs.get('mean_absolute_error'))
        self.val_errors.append(logs.get('val_mean_absolute_error'))

### FITTING

In [122]:
from keras.callbacks import TensorBoard
import os
import datetime

# log
log_dir = os.path.join("logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)


def add_hidden_layer(
    model,
    units,
    layers,
    activation=None,
):
    if activation:
        if layers > 1:
            for layer in range(layers - 1):
                model.add(
                    LSTM(units=units, activation=activation, return_sequences=True)
                )
        model.add(LSTM(units=units, activation=activation, return_sequences=False))
    else:
        if layers > 1:
            for layer in range(layers - 1):
                model.add(LSTM(units=units, return_sequences=True))
        model.add(LSTM(units=units, return_sequences=False))


# loop params

LSTM_units = 512

Stop_patience = 20

Batch_size = 64

Learning_rate = 1e-2

Epochs = 500

Layers = 10

Activation = "tanh"


# loss_function = 'mean_absolute_error'  # Assuming you are doing regression

# loss_function = 'categorical_crossentropy'

# loss_function = 'sparse_categorical_crossentropy'

loss_function = "mean_squared_error"


# Initialize LossHistory with validation data

history = LossHistory(
    x_train_sequences, y_train_sequences, x_val_sequences, y_val_sequences
)


def model_fit():

    print("epochs:", Epochs)

    print("batch:", Batch_size)

    print("units:", LSTM_units)

    # Build LSTM model

    model = Sequential()

    # Adding LSTM layer with L2 regularization
    model.add(Input(shape=(x_train_sequences.shape[1], x_train_sequences.shape[2])))
    # model.add(LSTM(units=LSTM_units, return_sequences=False))  # Regularization on the weights

    add_hidden_layer(
        model,
        LSTM_units,
        Layers,
        Activation
    )

    print("hidden layers:", Layers)

    # full con layer
    # model.add(Dense(units=16, activation=Activation))
    model.add(Dense(units=1))

    # Compile model

    model.compile(
        optimizer=Adam(learning_rate=Learning_rate),
        loss=loss_function,
        metrics=["mean_absolute_error"],
    )

    # Define early stopping

    early_stopping = EarlyStopping(
        monitor="val_loss", patience=Stop_patience, restore_best_weights=True
    )

    # early_stopping = EarlyStopping(monitor='val_mean_absolute_error', patience=Stop_patience, restore_best_weights=True)

    # Train the model with early stopping

    model.fit(
        x_train_sequences,
        y_train_sequences,
        epochs=Epochs,
        batch_size=Batch_size,
        validation_data=(x_val_sequences, y_val_sequences),
        callbacks=[history, tensorboard_callback],
        # callbacks=[history, early_stopping, tensorboard_callback],
    )

    prediction_results = model.predict(x_test_sequences)

    _abs = np.abs(y_test_sequences - prediction_results)

    mae = np.mean(_abs)

    print("MAE:", mae)

    for i in range(40):

        print(f"pred: {prediction_results[i]} | real: {y_test_sequences[i]}")


model_fit()

# MSE

# mse = mean_squared_error(y_test, prediction_results)

# print("Mean Squared Error:", mse)


# RMSE

# rmse = np.sqrt(mse)

# print("Mean Squared Error:", mse)


# MAE

# mae = mean_absolute_error(y_test_sequences, prediction_results)

# print("Mean Absoluted Error:", mae)


# from sklearn.metrics import mean_squared_error


# mse = mean_squared_error(y_test_sequences, prediction_results)

# print("Mean Squared Error:", mse)

epochs: 500
batch: 64
units: 256
hidden layers: 10
Epoch 1/500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - loss: 10962464.0000 - mean_absolute_error: 2587.4246 - val_loss: 20826876.0000 - val_mean_absolute_error: 3100.0063
Epoch 2/500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 660ms/step - loss: 11260851.0000 - mean_absolute_error: 2613.8584 - val_loss: 20754228.0000 - val_mean_absolute_error: 3088.2661
Epoch 3/500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614ms/step - loss: 10949158.0000 - mean_absolute_error: 2571.8538 - val_loss: 20727702.0000 - val_mean_absolute_error: 3083.9692
Epoch 4/500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 621ms/step - loss: 11328544.0000 - mean_absolute_error: 2637.5024 - val_loss: 20702556.0000 - val_mean_absolute_error: 3079.8889
Epoch 5/500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 620ms/step - loss: 10787960.0000 - mean_absolute_error: 2545.7246

KeyboardInterrupt: 

: 

### SAVE MODEL

In [None]:
# Save the model
    # model.save("/content/drive/MyDrive/LSTM/predict_model_2_0v.keras")