In [1]:
! pip install matplotlib
!pip install scipy

You should consider upgrading via the '/usr/local/bin/python3.7 -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/usr/local/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [3]:
import os, warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
warnings.filterwarnings('ignore') 

In [10]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import scipy
import sys 

from metrics.discriminative_metrics import discriminative_score_metrics
from metrics.predictive_metrics import predictive_score_metrics
from metrics.visualization_metrics import visualization

In [11]:
orig_data_dir = './data/orig/'
gen_data_dir = './data/generated/'

In [12]:
dataset = 'energy'      # sine, stocks, energy, air

# Scaler

In [13]:
class MinMaxScaler():
    """Min Max normalizer.
    Args:
    - data: original data

    Returns:
    - norm_data: normalized data
    """
    def fit(self, data):    
        self.mini = np.min(data, 0)
        numerator = data - self.mini
        self.range = np.max(data, 0) - self.mini
        norm_data = numerator / (self.range + 1e-7)
        return norm_data
    
    def inverse_transform(self, data):
        data *= self.range
        data += self.mini
        return data

In [14]:

# Custom scaler for 3d data
class MinMaxScaler_Feat_Dim():
    '''Scales history and forecast parts of time-series based on history data'''
    def __init__(self, scaling_len, input_dim, upper_bound = 3., lower_bound = -3.):         
        self.scaling_len = scaling_len
        self.min_vals_per_d = None      
        self.max_vals_per_d = None  
        self.input_dim = input_dim
        self.upper_bound = upper_bound
        self.lower_bound = lower_bound
        

    def fit(self, X, y=None): 

        if self.scaling_len < 1: 
            msg = f''' Error scaling series. 
            scaling_len needs to be at least 2. Given length is {self.scaling_len}.  '''
            raise Exception(msg)

        X_f = X[ :,  : self.scaling_len , : ]
        self.min_vals_per_d = np.expand_dims(np.expand_dims(X_f.min(axis=0).min(axis=0), axis=0), axis=0)
        self.max_vals_per_d = np.expand_dims(np.expand_dims(X_f.max(axis=0).max(axis=0), axis=0), axis=0)

        self.range_per_d = self.max_vals_per_d - self.min_vals_per_d
        self.range_per_d = np.where(self.range_per_d == 0, 1e-5, self.range_per_d)

        # print(self.min_vals_per_d.shape); print(self.max_vals_per_d.shape)
              
        return self
    
    def transform(self, X, y=None): 
        assert X.shape[-1] == self.min_vals_per_d.shape[-1], "Error: Dimension of array to scale doesn't match fitted array."
         
        X = X - self.min_vals_per_d
        X = np.divide(X, self.range_per_d )        
        X = np.where( X < self.upper_bound, X, self.upper_bound)
        X = np.where( X > self.lower_bound, X, self.lower_bound)
        return X
    
    def fit_transform(self, X, y=None):
        X = X.copy()
        self.fit(X)
        return self.transform(X)
        

    def inverse_transform(self, X):
        X = X.copy()
        X = X * self.range_per_d 
        X = X + self.min_vals_per_d
        # print(X.shape)
        return X

# Main Calculations

In [15]:
def confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h

In [None]:

metric_iteration = 1

training_sizes = [2, 5, 10, 20, 100]
datasets = ['sine', 'stocks', 'energy']

training_sizes = [2, 5, 10]
datasets = ['sine']

for dataset in datasets:

    for training_size in training_sizes:

        print('-'*90); print('-'*90)
        print(f"Data: {dataset}; Training Size: {training_size}")

    #     original data
        fname = f'{orig_data_dir + dataset}_subsampled_train_perc_{training_size}.npz'
        loaded = np.load(fname)
        ori_data = loaded['data']

        scaler_orig = MinMaxScaler( )  
        ori_data = scaler_orig.fit(ori_data)
        print('orig data shape: ', ori_data.shape)

    #     generated data
        #loaded = np.load("new_Time_vae_files/vae_conv_I_gen_samples_sine_perc_"+str(training_size)+".npz")        
        #generated_data = MinMaxScaler(loaded["data"])


        sample_file_name = gen_data_dir + f'vae_conv_I_gen_samples_{dataset}_perc_{training_size}.npz'
        loaded = np.load(sample_file_name)
        generated_data = loaded['data']

        # Abu scaler 
    #     gen_scaler = MinMaxScaler_Feat_Dim( scaling_len = T, input_dim = D, upper_bound = 3.0, lower_bound = -3.0 )  
    #     generated_data = gen_scaler.fit_transform(generated_data)

        gen_scaler = MinMaxScaler()  
        generated_data = gen_scaler.fit(generated_data)    
    #     print('generated_data shape:', generated_data.shape)

    #     print('means: ', ori_data.mean(axis=(0, 2)))
    #     print('means: ', generated_data.mean(axis=(0, 2)))

    #     print('min: ', ori_data.min(axis=(0, 2)))
    #     print('min: ', generated_data.min(axis=(0, 2)))

    #     print('max: ', ori_data.max(axis=(0, 2)))
    #     print('max: ', generated_data.max(axis=(0, 2)))
    #     sys.exit()
    #     ---------------------------------------------------------------------------
    #     print("-"*90); print('Visualizations:')
    #     visualization(ori_data[0:generated_data.shape[0]], generated_data, 'pca')
    #     visualization(ori_data[0:generated_data.shape[0]], generated_data, 'tsne')

        #     ---------------------------------------------------------------------------
    #     print("-"*90); print('Discrimination Score :')
    #     discriminative_score = list()
    #     for iter in range(metric_iteration):
    #         temp_disc = discriminative_score_metrics(ori_data, generated_data)
    #         discriminative_score.append(temp_disc)  
    #         print("---------- done disc iter: ", iter, 'score: ', temp_disc, '----------')

    #     #     ---------------------------------------------------------------------------
    #     print("-"*90); print('Discrimination Score :')
    #     print('Discriminative score: ' + str(np.round(np.mean(discriminative_score), 4)))
    #     print("Discriminative score CI: ", confidence_interval(discriminative_score)[1])

        #     ---------------------------------------------------------------------------             
        print("-"*90); print('Predictive Score :')
        predictive_score = list()
        for tt in range(metric_iteration):
            temp_pred = predictive_score_metrics(ori_data, ori_data, iterations = 4000)
            predictive_score.append(temp_pred)   
        print('Predictive score: ' + str(np.round(np.mean(predictive_score), 4)))
        print("Predictive score CI: ", confidence_interval(predictive_score)[1])

        print("\n")
        #     ---------------------------------------------------------------------------
    #     visualization(ori_data[0:generated_data.shape[0]], generated_data, 'pca', "sine_pca_" + str(int(training_size)))
    #     visualization(ori_data[0:generated_data.shape[0]], generated_data, 'tsne', "sine_tsne_" +str(int(training_size)))
        
print("all done")

------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------
Data: sine; Training Size: 2
orig data shape:  (200, 24, 5)
------------------------------------------------------------------------------------------
Predictive Score :
itt: 500 ploss: 0.31419238
itt: 1000 ploss: 0.30140588
itt: 1500 ploss: 0.31075373
itt: 2000 ploss: 0.30696774
itt: 2500 ploss: 0.31377903
itt: 3000 ploss: 0.31716377
itt: 3500 ploss: 0.30621698
Final p_loss 0.3080899 predictive_score: 0.30808991724221463
Predictive score: 0.3081
Predictive score CI:  nan


------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------
Data: sine; Training Size: 5
orig data shape:  (500, 24, 5)
------------------------------------------------------------------------------------------
Predic