In [5]:
import pandas as pd
import torch
import joblib
from preprocessing import *
from JREP import JREP
from PC_transformer import PCTransformer
from RNN import RNN
from LSTM import LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, root_mean_squared_error

In [6]:
df = pd.read_csv(r'output_csv\HCMUT-SuperNodeXP-2017-1.0.swf.csv')

In [7]:
feature_columns = ['requested_processors', 'requested_time', 'avg_cpu_time_used', 'used_memory', 'submit_time', 'wait_time', 'user_id', 'group_id', 'executable_id', 'queue_id']
target_column = 'run_time'

# Hyperparameters
input_dim = len(feature_columns)    # Example input dimension
d_model = 64      # Model dimension
num_heads = 8     # Number of attention heads
d_ff = 256        # Feed-forward dimension
num_layers = 3    # Number of transformer blocks
output_dim = 1    # Output dimension
batch_size = 128  # Batch size
seq_len = 20      # Length of data group
num_epochs = 10   # Number of training epochs
num_hidden = 64   # Number of hidden units
dropout = 0.2     # Dropout rate
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


dataloaders, scaler_seq = prepare_data_seq(df, feature_columns, target_column, seq_len=seq_len, batch_size=batch_size)
X_train, X_test, Y_train, Y_test, scaler = prepare_data(df, feature_columns, target_column)

In [8]:
PCTransformer_model = PCTransformer(input_dim, d_model, num_heads, d_ff, num_layers, output_dim, dropout).to(device)
PCTransformer_model.load_state_dict(torch.load('models/best_pc_transformer_model.pth', map_location=device))
PCTransformer_rmse, PCTransformer_mae, PCTransformer_mse, PCTransformer_r2 = PCTransformer_model.evaluate_model(PCTransformer_model, dataloaders['test'], device, scaler_seq, input_dim)

rnn_model = RNN(input_dim, num_hidden, num_layers, dropout).to(device)
rnn_model.load_state_dict(torch.load('models/best_rnn_model.pth', map_location=device))
rnn_rmse, rnn_mae, rnn_mse, rnn_r2 = rnn_model.evaluate_model(rnn_model, dataloaders['test'], scaler_seq, input_dim)

lstm_model = LSTM(input_dim, num_hidden, num_layers, dropout).to(device)
lstm_model.load_state_dict(torch.load('models/best_lstm_model.pth', map_location=device))
lstm_rmse, lstm_mae, lstm_mse, lstm_r2 = lstm_model.evaluate_model(lstm_model, dataloaders['test'], scaler_seq, input_dim)

JREP_model = JREP()
stacking_model = joblib.load('models/stacking_model.pkl')
JREP_model.stacking_model = stacking_model
Y_pred = JREP_model.predict(X_test, scaler, len(feature_columns))
JREP_rmse, JREP_mae, JREP_mse, JREP_r2 = JREP_model.evaluate_model(Y_test, Y_pred, scaler, len(feature_columns))

In [9]:
print(f"PCTransformer RMSE: {PCTransformer_rmse:.4f}")
print(f"PCTransformer MAE: {PCTransformer_mae:.4f}")
print(f"PCTransformer MSE: {PCTransformer_mse:.4f}")
print(f"PCTransformer R2: {PCTransformer_r2:.4f}")

PCTransformer RMSE: 94262.0653
PCTransformer MAE: 62498.5468
PCTransformer MSE: 8885336957.2278
PCTransformer R2: 0.1522


In [10]:
print(f"JREP RMSE: {JREP_rmse:.4f}")
print(f"JREP MAE: {JREP_mae:.4f}")
print(f"JREP MSE: {JREP_mse:.4f}")
print(f"JREP R2: {JREP_r2:.4f}")

JREP RMSE: 28223.2052
JREP MAE: 11600.8960
JREP MSE: 796549314.0460
JREP R2: 0.9499


In [11]:
print(f"RNN RMSE: {rnn_rmse:.4f}")
print(f"RNN MAE: {rnn_mae:.4f}")
print(f"RNN MSE: {rnn_mse:.4f}")
print(f"RNN R2: {rnn_r2:.4f}")

RNN RMSE: 61268.9908
RNN MAE: 39725.8433
RNN MSE: 3753889238.2746
RNN R2: 0.7621


In [12]:
print(f'LSTM RMSE: {lstm_rmse:.4f}')
print(f'LSTM MAE: {lstm_mae:.4f}')
print(f'LSTM MSE: {lstm_mse:.4f}')
print(f'LSTM R2: {lstm_r2:.4f}')

LSTM RMSE: 81792.7611
LSTM MAE: 57900.6272
LSTM MSE: 6690055763.9777
LSTM R2: 0.5760
