In [1]:
! pip install matplotlib
!pip install scipy

You should consider upgrading via the '/usr/local/bin/python3.6 -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/usr/local/bin/python3.6 -m pip install --upgrade pip' command.[0m


In [2]:
import os, warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
warnings.filterwarnings('ignore') 

In [3]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import scipy
import sys 
import time

# from metrics.discriminative_metrics import discriminative_score_metrics
# from metrics.predictive_metrics import predictive_score_metrics

from metrics.discriminative_metrics2 import discriminative_score_metrics
from metrics.predictive_metrics2 import predictive_score_metrics
from metrics.visualization_metrics import visualization

In [4]:
orig_data_dir = "../data/processed_orig_data/"
gen_data_dir = "../data/generated_data/"

# Scaler

In [5]:
class MinMaxScaler():
    """Min Max normalizer.
    Args:
    - data: original data

    Returns:
    - norm_data: normalized data
    """
    def fit_transform(self, data): 
        self.fit(data)
        scaled_data = self.transform(data)
        return scaled_data


    def fit(self, data):    
        self.mini = np.min(data, 0)
        self.range = np.max(data, 0) - self.mini
        return self
        

    def transform(self, data):
        numerator = data - self.mini
        scaled_data = numerator / (self.range + 1e-7)
        return scaled_data

    
    def inverse_transform(self, data):
        data *= self.range
        data += self.mini
        return data

# Main Calculations

In [6]:
def confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h

In [None]:
start = time.time()

metric_iteration = 1

# full selection of data to run
training_sizes = [2, 5, 10, 20, 100]
datasets = ['sine', 'stocks', 'air', 'energy']


# custom selection 
training_sizes = [ 100 ]
datasets = ['stocks']

for dataset in datasets:

    for training_size in training_sizes:

        print('-'*90); print('-'*90)
        print(f"Data: {dataset}; Training Size: {training_size}")

    #     original data
        fname = f'{orig_data_dir + dataset}_subsampled_train_perc_{training_size}.npz'
        loaded = np.load(fname)
        ori_data = loaded['data']
        print('orig data shape: ', ori_data.shape)

        # generated data
        sample_file_name = gen_data_dir + f'vae_conv_I_gen_samples_{dataset}_perc_{training_size}.npz'
        loaded = np.load(sample_file_name)
        gen_data = loaded['data']

#         print('orig means: ', ori_data.mean(axis=(0, 2)))
#         print('gen means: ', gen_data.mean(axis=(0, 2)))

        scaler_orig = MinMaxScaler( )  
        scaled_ori_data = scaler_orig.fit_transform(ori_data)
        scaled_gen_data = scaler_orig.transform(gen_data)    
    #     print('generated_data shape:', generated_data.shape)
    #     ---------------------------------------------------------------------------
        print("-"*90); print('Visualizations:')
        visualization(scaled_ori_data[0:scaled_gen_data.shape[0]], scaled_gen_data, 'pca')
        visualization(scaled_ori_data[0:scaled_gen_data.shape[0]], scaled_gen_data, 'tsne')

            ---------------------------------------------------------------------------
        print("-"*90); print('Discrimination Score :')
        discriminative_score = list()
        for tt in range(metric_iteration):
            temp_disc = discriminative_score_metrics(scaled_ori_data, scaled_gen_data,  epochs = 1000)
            discriminative_score.append(temp_disc)  
            print("----------  disc iter: ", tt, 'score: ', temp_disc, '----------')

        print("-"*90); print('Discrimination Score :')
        print('Discriminative score: ' + str(np.round(np.mean(discriminative_score), 4)))
        print("Discriminative score CI: ", confidence_interval(discriminative_score)[1])

        #     ---------------------------------------------------------------------------             
        print("-"*90); print('Predictive Score :')
        predictive_score = list()
        for tt in range(metric_iteration):
            temp_pred = predictive_score_metrics(scaled_ori_data, scaled_gen_data, epochs = 500)
            predictive_score.append(temp_pred)   
            print("----------  pred iter: ", tt, 'score: ', temp_pred, '----------')
        print('Predictive score: ' + str(np.round(np.mean(predictive_score), 4)))
        print("Predictive score CI: ", confidence_interval(predictive_score)[1])

        print("\n")
        #     ---------------------------------------------------------------------------

end = time.time()
print(f"Total run time: {np.round((end - start)/60.0, 2)} minutes") 

------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------
Data: stocks; Training Size: 100
orig data shape:  (3661, 24, 6)
------------------------------------------------------------------------------------------
Predictive Score :
Avg. train loss for epoch 0: 0.119 
