In [1]:
import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import sqrtm
from dtaidistance.dtw_ndim import distance as multi_dtw_distance
from src.ds_ps import discriminative_score_metrics, predictive_score_metrics
from src.ts2vec import initialize_ts2vec
from src.feature_based_measures import calculate_mdd, calculate_acd, calculate_sd, calculate_kd
from src.visualization import visualize_tsne, visualize_distribution
from src.utils import show_with_start_divider, show_with_end_divider, determine_device, write_json_data
import pandas as pd

2025-01-16 21:35:55.175307: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-16 21:35:55.249804: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737034555.278371 2295664 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737034555.286729 2295664 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-16 21:35:55.357746: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
def sliding_window_view(data, window_size, step=1):
    if data.ndim != 2:
        raise ValueError("Input array must be 2D")
    L, C = data.shape  # Length and Channels
    if L < window_size:
        raise ValueError("Window size must be less than or equal to the length of the array")

    # Calculate the number of windows B
    B = L // window_size
    # B = L - window_size + 1

    # Shape of the output array
    new_shape = (B, window_size, C)

    # Calculate strides
    original_strides = data.strides
    new_strides = (window_size * original_strides[0],) + original_strides  # (stride for L, stride for W, stride for C)
    # new_strides = (original_strides[0],) + original_strides  # (stride for L, stride for W, stride for C)

    # Create the sliding window view
    strided_array = np.lib.stride_tricks.as_strided(data, shape=new_shape, strides=new_strides)
    # strided_array = np.transpose(strided_array, axes=(0, 2, 1)) #(b c l)
    return strided_array

In [41]:
def sliding_window_view_tsgbench(data, window_size, step=1):
    if data.ndim != 2:
        raise ValueError("Input array must be 2D")
    L, C = data.shape  # Length and Channels
    if L < window_size:
        raise ValueError("Window size must be less than or equal to the length of the array")

    # Calculate the number of windows B
    B = L - window_size + 1
    
    # Shape of the output array
    new_shape = (B, window_size, C)
    
    # Calculate strides
    original_strides = data.strides
    new_strides = (original_strides[0],) + original_strides  # (stride for L, stride for W, stride for C)

    # Create the sliding window view
    strided_array = np.lib.stride_tricks.as_strided(data, shape=new_shape, strides=new_strides)
    return strided_array

In [3]:
def normalize(x):
    x_norm = (x-np.min(x))/(np.max(x)-np.min(x))
    return x_norm

In [4]:
def calculate_fid(act1, act2):
    # calculate mean and covariance statistics
    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
    # calculate sum squared difference between means
    ssdiff = np.sum((mu1 - mu2)**2.0)
    # calculate sqrt of product between cov
    covmean = sqrtm(sigma1.dot(sigma2))
    # check and correct imaginary numbers from sqrt
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    # calculate score
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

In [5]:
def calculate_ed(ori_data,gen_data):
    n_samples = ori_data.shape[0]
    n_series = ori_data.shape[2]
    distance_eu = []
    for i in range(n_samples):
        total_distance_eu = 0
        for j in range(n_series):
            distance = np.linalg.norm(ori_data[i, :, j] - gen_data[i, :, j])
            total_distance_eu += distance
        distance_eu.append(total_distance_eu / n_series)

    distance_eu = np.array(distance_eu)
    average_distance_eu = distance_eu.mean()
    return average_distance_eu

In [6]:
def calculate_dtw(ori_data,comp_data):
    distance_dtw = []
    n_samples = ori_data.shape[0]
    for i in range(n_samples):
        distance = multi_dtw_distance(ori_data[i].astype(np.double), comp_data[i].astype(np.double), use_c=True)
        distance_dtw.append(distance)

    distance_dtw = np.array(distance_dtw)
    average_distance_dtw = distance_dtw.mean()
    return average_distance_dtw

In [7]:
# df_gen = np.load("../../Experiments/Cond_TimeVQVAE/exp_saved_models/Exp_orignial/Walmart.npy")
# # df_gen = df_gen.flatten()

In [57]:
df_gen = pd.read_csv("../../Experiments/Cond_TimeVQVAE/synthetic_data/synthetic-Walmart.csv")
df_gen = df_gen.to_numpy()
print(df_gen.shape)

(49764, 1)


In [56]:
# df_real = pd.read_csv("../../Experiments/Cond_TimeVQVAE/datasets/CustomDataset/Walmart_test.csv")
# df_real = df_real['Weekly_Sales']
# df_real.to_csv("../../Experiments/Cond_TimeVQVAE/datasets/CustomDataset/Walmart_test_weekly_sales.csv", index=False)

In [60]:
df_real = pd.read_csv("../../Experiments/Cond_TimeVQVAE/datasets/CustomDataset/Walmart_test_weekly_sales.csv")
df_real = df_real.to_numpy()
print(df_real.shape)
if len(df_real.shape) != 2:
    df_real = df_real.reshape((df_real.shape[0],1))
print(df_real.shape)

(49764, 1)
(49764, 1)


In [46]:
df_real_tsgbench = sliding_window_view_tsgbench(df_real, 143)
print(df_real_tsgbench.shape)

(49628, 143, 1)


In [47]:
df_real = sliding_window_view(df_real, 143)

In [48]:
print(df_real.shape)
print(df_gen.shape)

(348, 143, 1)
(348, 143, 1)


In [20]:
ed = calculate_ed(normalize(df_real), normalize(df_gen))
print(ed)

0.8382124527340145


In [21]:
dtw = calculate_dtw(normalize(df_real), normalize(df_gen))
print(dtw)

0.7595295411922902


In [22]:
fid = calculate_fid(df_real, df_gen)

ValueError: m has more than 2 dimensions

In [20]:
df_real.shape

(49770,)