In [None]:
# python run.py --model transformer --target PM2.5 --use_sam --data AQI --feature_type MS --seq_len 168 --pred_len 48 --train_epochs 200  --learning_rate 0.0001 --add_results --n_block 4 --num_heads 4 --d_model 24 --ff_dim 4096 --batch_size 64 --rho 0.4

In [None]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:
# load data
sequence_length = 168
prediction_length = 48
fold = 2
folder = str(sequence_length) + '_' + str(prediction_length) + '/fold' + str(fold)
# read npy files
y_test_pred = np.load('results/predictions_transformer_AQI/' + folder + '/y_test_pred.npy')
y_test = np.load('results/predictions_transformer_AQI/' + folder + '/y_test.npy')
y_train = np.load('results/predictions_transformer_AQI/' + folder + '/y_train.npy')
y_train_pred = np.load('results/predictions_transformer_AQI/' + folder + '/y_train_pred.npy')
y_val_pred = np.load('results/predictions_transformer_AQI/' + folder + '/y_val_pred.npy')
y_val = np.load('results/predictions_transformer_AQI/' + folder + '/y_val.npy')


In [None]:
# print shape
print(y_train.shape)
print(y_train_pred.shape)
print(y_val.shape)
print(y_val_pred.shape)
print(y_test.shape)
print(y_test_pred.shape)


In [None]:
# reshape
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1])
y_val = y_val.reshape(y_val.shape[0], y_val.shape[1])
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1])

#print shape
print("y_train shape: ", y_train.shape)
print("y_val shape: ", y_val.shape)
print("y_test shape: ", y_test.shape)

In [None]:
# calculate rmse and mae for validation and test set
rmse_train = np.sqrt(np.mean((y_train - y_train_pred)**2))
mae_train = np.mean(np.abs(y_train - y_train_pred))
rmse_val = np.sqrt(np.mean((y_val - y_val_pred)**2))
mae_val = np.mean(np.abs(y_val - y_val_pred))

# when calculating rmse and mae for test set, we need to exclude missing values
# load missing_indexes.csv
missing_indexes = pd.read_csv('C:/Users/zhang/Desktop/Thesis/Data/missing_indexes.csv')
# only use the test set
missing_indexes = missing_indexes.loc[35064:]
# delete the index column
missing_indexes = missing_indexes.drop('time', axis=1)
# reshape missing_indexes
prediction_length = 48
def create_sequences(df, pred_length):
    ys = []
    for i in range(len(df) - pred_length + 1):
        y = df[i :i + pred_length]
        ys.append(y)
    return np.array(ys)
missing_indexes_sequence = create_sequences(missing_indexes, prediction_length)
# reshape missing_indexes_sequence
missing_indexes_sequence = missing_indexes_sequence.reshape(missing_indexes_sequence.shape[0], missing_indexes_sequence.shape[1])
# mask missing values
rmse_test = np.sqrt(np.mean((y_test[~missing_indexes_sequence] - y_test_pred[~missing_indexes_sequence])**2))
mae_test = np.mean(np.abs(y_test[~missing_indexes_sequence] - y_test_pred[~missing_indexes_sequence]))

'''# calculate rmse and mae for test set
rmse_test = np.sqrt(np.mean((y_test - y_test_pred)**2))
mae_test = np.mean(np.abs(y_test - y_test_pred))'''

print("rmse_train: ", rmse_train)
print("mae_train: ", mae_train)
print("rmse_val: ", rmse_val)
print("mae_val: ", mae_val)
print("rmse_test: ", rmse_test)
print("mae_test: ", mae_test)

#save results
results = pd.DataFrame(columns=['rmse_train', 'mae_train', 'rmse_val', 'mae_val', 'rmse_test', 'mae_test'])
results.loc[0] = [rmse_train, mae_train, rmse_val, mae_val, rmse_test, mae_test]
results.to_csv('results/predictions_transformer_AQI/' + folder + '/results.csv', index=False)

In [None]:
# split test set into 4 seasons
#start date for each season is March 1 (spring), June 1 (summer), September 1 (fall), and December 1 (winter)
y_test_spring = y_test[59*24:151*24]
y_test_summer = y_test[151*24:243*24]
y_test_fall = y_test[243*24:334*24]
y_test_winter = np.concatenate((y_test[334*24:], y_test[:59*24]))

print("y_test_spring shape: ", y_test_spring.shape)

y_test_pred_spring = y_test_pred[59*24:151*24]
y_test_pred_summer = y_test_pred[151*24:243*24]
y_test_pred_fall = y_test_pred[243*24:334*24]
y_test_pred_winter = np.concatenate((y_test_pred[334*24:], y_test_pred[:59*24]))
                                    

In [None]:
# calcuate rmse and mae for each season
# spring
rmse_test_spring = np.sqrt(np.mean((y_test_spring[~missing_indexes_sequence[59*24:151*24]] - y_test_pred_spring[~missing_indexes_sequence[59*24:151*24]])**2))
mae_test_spring = np.mean(np.abs(y_test_spring[~missing_indexes_sequence[59*24:151*24]] - y_test_pred_spring[~missing_indexes_sequence[59*24:151*24]]))
# summer
rmse_test_summer = np.sqrt(np.mean((y_test_summer[~missing_indexes_sequence[151*24:243*24]] - y_test_pred_summer[~missing_indexes_sequence[151*24:243*24]])**2))
mae_test_summer = np.mean(np.abs(y_test_summer[~missing_indexes_sequence[151*24:243*24]] - y_test_pred_summer[~missing_indexes_sequence[151*24:243*24]]))
# fall
rmse_test_fall = np.sqrt(np.mean((y_test_fall[~missing_indexes_sequence[243*24:334*24]] - y_test_pred_fall[~missing_indexes_sequence[243*24:334*24]])**2))
mae_test_fall = np.mean(np.abs(y_test_fall[~missing_indexes_sequence[243*24:334*24]] - y_test_pred_fall[~missing_indexes_sequence[243*24:334*24]]))
# winter
missing_indexes_sequence_winter = np.concatenate((missing_indexes_sequence[334*24:], missing_indexes_sequence[:59*24]))
rmse_test_winter = np.sqrt(np.mean((y_test_winter[~missing_indexes_sequence_winter] - y_test_pred_winter[~missing_indexes_sequence_winter])**2))
mae_test_winter = np.mean(np.abs(y_test_winter[~missing_indexes_sequence_winter] - y_test_pred_winter[~missing_indexes_sequence_winter]))

print("rmse_test_spring: ", rmse_test_spring)
print("mae_test_spring: ", mae_test_spring)
print("rmse_test_summer: ", rmse_test_summer)
print("mae_test_summer: ", mae_test_summer)
print("rmse_test_fall: ", rmse_test_fall)
print("mae_test_fall: ", mae_test_fall)
print("rmse_test_winter: ", rmse_test_winter)
print("mae_test_winter: ", mae_test_winter)


In [None]:
'''# calcuate rmse and mae for each season without mask
# spring
rmse_test_spring = np.sqrt(np.mean((y_test_spring - y_test_pred_spring)**2))
mae_test_spring = np.mean(np.abs(y_test_spring - y_test_pred_spring))
# summer
rmse_test_summer = np.sqrt(np.mean((y_test_summer - y_test_pred_summer)**2))
mae_test_summer = np.mean(np.abs(y_test_summer - y_test_pred_summer))
# fall
rmse_test_fall = np.sqrt(np.mean((y_test_fall - y_test_pred_fall)**2))
mae_test_fall = np.mean(np.abs(y_test_fall - y_test_pred_fall))
# winter
rmse_test_winter = np.sqrt(np.mean((y_test_winter - y_test_pred_winter)**2))
mae_test_winter = np.mean(np.abs(y_test_winter - y_test_pred_winter))

print("rmse_test_spring: ", rmse_test_spring)
print("mae_test_spring: ", mae_test_spring)
print("rmse_test_summer: ", rmse_test_summer)
print("mae_test_summer: ", mae_test_summer)
print("rmse_test_fall: ", rmse_test_fall)
print("mae_test_fall: ", mae_test_fall)
print("rmse_test_winter: ", rmse_test_winter)
print("mae_test_winter: ", mae_test_winter)'''

In [None]:
'''# add time labels
# train: '2018-01-01 00:00:00') - '2020-12-31 23:00:00'
# validation: '2021-01-01 00:00:00' - '2021-12-31 23:00:00'
# test: '2022-01-01 00:00:00 - time <= '2022-12-31 23:00:00'
time = pd.date_range(start='2018-01-01 00:00:00', end='2022-12-31 23:00:00', freq='H')
time_train = time[0:y_train.shape[0]]
time_val = time[y_train.shape[0]:y_train.shape[0]+y_val.shape[0]]
time_test = time[y_train.shape[0]+y_val.shape[0]:]

# create dataframes with time as index
df_train = pd.DataFrame(y_train)
df_train_pred = pd.DataFrame(y_train_pred)
df_val = pd.DataFrame(y_val)
df_val_pred = pd.DataFrame(y_val_pred)
df_test = pd.DataFrame(y_test)
df_test_pred = pd.DataFrame(y_test_pred)

df_train.index = time_train
df_train_pred.index = time_train
df_val.index = time_val
df_val_pred.index = time_val
df_test.index = time_test
df_test_pred.index = time_test'''


In [None]:
# calculate average y-test
y_test_avg = np.mean(y_test, axis=1)
y_test_pred_avg = np.mean(y_test_pred, axis=1)


In [None]:
# plot average test results
plt.figure(figsize=(14, 7))
plt.plot(y_test_avg, label='True')
plt.plot(y_test_pred_avg, 'r', label='Predicted')
plt.xlabel('Time')
plt.legend()
plt.ylabel('PM2.5')

#save the plot
plt.savefig('results/predictions_transformer_AQI/' + folder + f'/SAMFormer{prediction_length}.png')


In [None]:
# calculate average y-validation
y_val_avg = np.mean(y_val, axis=1)
y_val_pred_avg = np.mean(y_val_pred, axis=1)


In [None]:
# plot average validation results
plt.figure(figsize=(14, 7))
plt.plot(y_val_avg, label='True')
plt.plot(y_val_pred_avg, 'r', label='Predicted')
plt.xlabel('Time')
plt.legend()
plt.ylabel('PM2.5')

#save the plot
plt.savefig('results/predictions_transformer_AQI/' + folder + f'/SAMFormer{prediction_length}_val.png')