# Metric Presentation and Visualization
## Necessary packages and functions call

- DDPM-TS: Interpretable Diffusion for Time Series Generation
- Metrics: 
    - discriminative_metrics
    - predictive_metrics
    - visualization

In [7]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from Utils.metric_utils import display_scores
from Utils.discriminative_metric import discriminative_score_metrics
from Utils.predictive_metric import predictive_score_metrics

## Data Loading

Load original dataset and preprocess the loaded data.

In [8]:
iterations = 5
dataset_name = 'energy'
seq_length = 24
# ori_data = np.load('../toy_exp/samples/energy_ground_truth_24_train.npy')
# ori_data = np.load(f'../energy_results/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
ori_data = np.load(f'../energy_results/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')
fake_data = np.load('../energy_results/ddpm_fake_energy_0_to1.npy')

## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [9]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('energy:')
display_scores(discriminative_score)
print()

training: 100%|██████████| 2000/2000 [00:35<00:00, 55.56it/s]


Iter 0:  0.12807506974384986 , 0.6469693127060614 , 0.6091808267816383 



training: 100%|██████████| 2000/2000 [00:35<00:00, 55.78it/s]


Iter 1:  0.14316510271366978 , 0.6614253106771494 , 0.6249048947501902 



training: 100%|██████████| 2000/2000 [00:36<00:00, 55.41it/s]


Iter 2:  0.12046664975906674 , 0.7859497844281005 , 0.454983515090033 



training: 100%|██████████| 2000/2000 [00:36<00:00, 54.20it/s]


Iter 3:  0.13695155972609685 , 0.6325133147349734 , 0.6413898047172204 



training: 100%|██████████| 2000/2000 [00:36<00:00, 54.96it/s]


Iter 4:  0.12566573674866854 , 0.6697945726604109 , 0.5815369008369262 

energy:
Final Score:  0.13086482373827035 ± 0.011299832660122229



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [10]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('energy:')
display_scores(predictive_score)
print()

training: 100%|██████████| 5000/5000 [00:59<00:00, 84.21it/s]


0  epoch:  0.25052372534130823 



training: 100%|██████████| 5000/5000 [00:59<00:00, 84.33it/s]


1  epoch:  0.2509181311757774 



training: 100%|██████████| 5000/5000 [00:59<00:00, 84.60it/s]


2  epoch:  0.2508194930258369 



training: 100%|██████████| 5000/5000 [00:59<00:00, 84.52it/s]


3  epoch:  0.2505354706207486 



training: 100%|██████████| 5000/5000 [00:59<00:00, 84.54it/s]


4  epoch:  0.2502706216536169 

energy:
Final Score:  0.25061348836345754 ± 0.0003207646826894412

