In [1]:
import os
import sys
import numpy as np
import utils
import plot
from tqdm import tqdm

from sklearn.metrics import mean_absolute_error, mean_squared_error

In [3]:
train_f, train_a = utils.get_training_data()
test_f, test_a = utils.get_test_data()
val_f, val_a = utils.get_validation_data()
train_f_norm = utils.normalize_numpy(train_f, per_sample=False, minusone_one=True)
test_f_norm = utils.normalize_numpy(test_f, per_sample=False, minusone_one=True)
val_f_norm = utils.normalize_numpy(val_f, per_sample=False, minusone_one=True)

In [4]:
full_set = np.concatenate([train_f,val_f])
full_set_norm = np.concatenate([train_f_norm,val_f_norm])
full_attribute = np.concatenate([train_a,val_a])

Completely random sampling

without normalization

In [38]:
#no normalization
energy_scores = []
mse = []
mae = []
for org_sample,_ in tqdm(zip(test_f,test_a)):

    #Take 250 random samples from the full dataset
    random_samples = full_set[np.random.choice(full_set.shape[0], 250, replace=False)]

    probality_array = [1/len(random_samples)]*len(random_samples) #All have the same probability of occuring!
    energy_scores.append(utils.calculate_energy_score(probality_array,random_samples,org_sample))

    mae_temp = []
    mse_temp = []
    for s in random_samples:
        mae_temp.append(np.abs(org_sample - s))
        mse_temp.append(np.square(org_sample - s))
    mae.append(np.mean(mae_temp))
    mse.append(np.mean(mse_temp))

print(f'Energy scores | Median: {np.nanmedian(energy_scores):.3f}   Mean: {np.nanmean(energy_scores) :.3f} ')
print(f'MSE | Median: {np.nanmedian(mse):.3f}   Mean: {np.nanmean(mse) :.3f} ')
print(f'MAE | Median: {np.nanmedian(mae):.3f}   Mean: {np.nanmean(mae) :.3f} ')

11414it [00:22, 506.26it/s]

Energy scores | Median: 0.779   Mean: 1.185 
MSE | Median: 0.060   Mean: 0.089 
MAE | Median: 0.126   Mean: 0.155 





Look-alike days

In [6]:
energy_scores = []
mse = []
mae = []
test_a_1 = utils.undo_onehot_encoding(test_a,[4,7,8,9])
full_a_1 = utils.undo_onehot_encoding(full_attribute,[4,7,8,9])
for i in range(7):
    print(i)
    for org_sample,org_attribute in tqdm(zip(test_f,test_a_1)):

        #Take 250 samples from the dataset that with a +- similar day
        samples = utils.get_similar_days(full_set, full_a_1[:,:i], org_attribute[:i], k = 250)

        probality_array = [1/len(samples)]*len(samples) #All have the same probability of occuring!
        energy_scores.append(utils.calculate_energy_score(probality_array,samples,org_sample))

        mae_temp = []
        mse_temp = []
        for s in samples:
            mae_temp.append(np.abs(org_sample - s))
            mse_temp.append(np.square(org_sample - s))
        mae.append(np.mean(mae_temp))
        mse.append(np.mean(mse_temp))

    print(f'Energy scores | Median: {np.nanmedian(energy_scores):.3f}   Mean: {np.nanmean(energy_scores) :.3f} ')
    print(f'MSE | Median: {np.nanmedian(mse):.3f}   Mean: {np.nanmean(mse) :.3f} ')
    print(f'MAE | Median: {np.nanmedian(mae):.3f}   Mean: {np.nanmean(mae) :.3f} ')

0


11414it [00:15, 757.94it/s]


Energy scores | Median: 0.772   Mean: 1.185 
MSE | Median: 0.046   Mean: 0.077 
MAE | Median: 0.115   Mean: 0.145 
1


11414it [00:18, 627.14it/s]


Energy scores | Median: 0.754   Mean: 1.064 
MSE | Median: 0.042   Mean: 0.069 
MAE | Median: 0.109   Mean: 0.131 
2


11414it [00:20, 565.64it/s]


Energy scores | Median: 0.749   Mean: 1.024 
MSE | Median: 0.040   Mean: 0.067 
MAE | Median: 0.106   Mean: 0.126 
3


11414it [00:25, 454.23it/s]


Energy scores | Median: 0.749   Mean: 1.008 
MSE | Median: 0.038   Mean: 0.066 
MAE | Median: 0.105   Mean: 0.125 
4


11414it [00:27, 420.06it/s]


Energy scores | Median: 0.748   Mean: 0.997 
MSE | Median: 0.038   Mean: 0.065 
MAE | Median: 0.104   Mean: 0.125 
5


11414it [00:29, 384.59it/s]


Energy scores | Median: 0.747   Mean: 0.991 
MSE | Median: 0.037   Mean: 0.065 
MAE | Median: 0.103   Mean: 0.124 
6


11414it [00:32, 348.70it/s]

Energy scores | Median: 0.746   Mean: 0.987 
MSE | Median: 0.037   Mean: 0.064 
MAE | Median: 0.103   Mean: 0.124 



