# Preparing training data

In [103]:
# -- import packages -- 
import numpy as np
import json
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [104]:
training_data_file_name = 'training_data_8.json'
training_data_file_path = Path().absolute().parent / 'training_data' / training_data_file_name

with open(training_data_file_path, 'r', encoding='utf-8') as f:
    training_data = json.load(f)

window_size = 6

# waiting_time_array = np.concatenate([np.zeros(window_step), np.array(training_data.get('waiting_time'))])
waiting_time_array = np.array(training_data.get('waiting_time'))
response_time_array = np.array(training_data.get('response_time'))
cpu_spent_usage_array = np.array(training_data.get('cpu_spent_usage'))

request_number = np.array(training_data.get('request_number'))
predicted_processing_time = np.array(training_data.get('predicted_processing_time'))
waiting_jobs = np.array(training_data.get('waiting_jobs'))

X_lagged = []

# -- Create lagged features --
for i in range(window_size, len(waiting_time_array)):
    X_lagged.append([
        # waiting_jobs[i-1], waiting_jobs[i-2], waiting_jobs[i-3],
        predicted_processing_time[i-1], predicted_processing_time[i-2], predicted_processing_time[i-3],  
        request_number[i-1], request_number[i-2], request_number[i-3],
        waiting_time_array[i-1], waiting_time_array[i-2], waiting_time_array[i-3],
        response_time_array[i-1], response_time_array[i-2], response_time_array[i-3],
        cpu_spent_usage_array[i-1], cpu_spent_usage_array[i-2], cpu_spent_usage_array[i-3],
    ])

X_lagged = np.array(X_lagged)

y_lagged = waiting_time_array[window_size:]

scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X_lagged)
y_scaled = scaler.fit_transform(y_lagged.reshape(-1, 1))


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.3, shuffle=False)

# -- Further split the training set into train and validation sets --
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, shuffle=False)

# -- Reshape input data for LSTM [samples, time steps, features] --
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# LSTM

In [105]:
# -- Build LSTM model --
model = Sequential()

# LSTM layer
model.add(LSTM(units=100, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
# Dropout layer to prevent overfitting
model.add(Dropout(0.3))

# Output layer
model.add(Dense(units=1))


# Compile model
model.compile(optimizer='adam', loss='mean_squared_error')


# -- Train the LSTM model --
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# -- Make predictions --
y_pred = model.predict(X_test)

# -- Inverse scaling to get real values --
y_pred_real = scaler.inverse_transform(y_pred)
y_test_real = scaler.inverse_transform(y_test)

# -- Evaluate the model --
mse = mean_squared_error(y_test_real, y_pred_real)
mea = mean_absolute_error(y_test_real, y_pred_real)
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mea}")
print(f"Mean of y_pred_real: {np.mean(y_pred_real)}")
print(f"Mean of y_test_real: {np.mean(y_test_real)}")
print(f"Accuracy: {(np.mean(y_test_real) - mea) / np.mean(y_test_real)}")

# -- Print real vs predicted values --
print(f"    real    |    pred   ")
for i in range(len(y_test_real)):
    print(f"    {y_test_real[i][0]}     |     {y_pred_real[i][0]}      ")

Epoch 1/50


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 154ms/step - loss: 0.1131 - val_loss: 0.3149
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 0.0782 - val_loss: 0.2504
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.0568 - val_loss: 0.1949
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0344 - val_loss: 0.1485
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0202 - val_loss: 0.1104
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0157 - val_loss: 0.0803
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0105 - val_loss: 0.0582
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0091 - val_loss: 0.0431
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

# Save model

In [106]:
# model.save_model(Path() / 'modelsfile' / 'xgboost_waiting_time_model.json')