In [1]:
!ls

01_load_data.ipynb	       compressed64_50k_seismic.npy
02_try_wity_pywavelets.ipynb   CompressionDWT_v2.ipynb
03_haar_dwt.ipynb	       CompressionDWT_v3.ipynb
04_example_pywavelets.ipynb    humidity.csv
05_db_dwt.ipynb		       reconstructed_128to256_50k_seismic.npy
1d.ipynb		       reconstructed_16to256_50k_seismic.npy
compressed128_50k.npy	       reconstructed_32to256_50k_seismic.npy
compressed128_50k_seismic.npy  reconstructed_64to256_50k_seismic.npy
compressed16_50k.npy	       seismic-query_size100_len256_znorm.bin
compressed16_50k_seismic.npy   seismic_size50k_len256_znorm.bin
compressed32_50k.npy	       synthetic-query_size100_len256_znorm.bin
compressed32_50k_seismic.npy   synthetic_size50k_len256_znorm.bin
compressed64_50k.npy


In [2]:
import struct
import pywt # python library for wavelet transformation
import numpy as np
import matplotlib.pyplot as plt
#plt.rcParams['figure.figsize'] = [15, 7]

In [3]:
# load a data set with 100 signals
filename = 'synthetic_size50k_len256_znorm.bin'
#time_series = np.fromfile(filename, dtype=np.float32).reshape(-1,256)
with open(filename, 'rb') as in_file:
    time_series = np.array(struct.unpack('f' * 50000 * 256, in_file.read())).reshape(-1,256)

In [4]:
# compression of data set 'x' to half size, e.g. 256 -> 128
def compress(x):
    compressed_x, cD = pywt.dwt(x, 'db1')
    return compressed_x
# reconstruction of data set 'y' to twice size, e.g. 64 -> 128
def reconstruct(y):
    reconstructed_y = pywt.idwt(y, None, 'db1')
    return reconstructed_y

In [5]:
# compression of data set 'x' to [x.original_size / 2**i] 
def com_iterative(x, i):
    for k in range(i):
        compressed_x = compress(x)
        x = compressed_x
    return x
# reconstruction of data set 'y' to [y.original_size * 2**i]
def rec_iterative(y, i):
    tmp = y
    for k in range(i):
        reconstructed_y = reconstruct(tmp)
        tmp = reconstructed_y
    return reconstructed_y

In [6]:
# Create interface functions to encode/decode easily
def encoder128(x):
    return com_iterative(x, int(np.power(x.shape[0]/128, 1/2)))
def encoder64(x):
    return com_iterative(x, int(np.power(x.shape[0]/64, 1/2)))
def encoder32(x):
    return com_iterative(x, int(np.power(x.shape[0]/32, 1/2)))
def decoder256(y):
    return rec_iterative(y, int(np.power(256/y.shape[0], 1/2)))

In [7]:
# let x be the first signal of 256 float values
x = time_series[0,]
# encode(compress) to 128 values
resized = encoder128(x)
# reconstruct a 256 value signal from the resized signal
y = decoder256(resized)

In [8]:
# size of x(original signal), resized(compressed signal), y(reconstructed signal)
print(x.shape, resized.shape, y.shape)

(256,) (128,) (256,)


In [9]:
# calculate euclidean distance between original signal and reconstructed signal
dist = np.linalg.norm(x-y)
print(dist)

1.2925096653235377


In [10]:
'''
'''

'\n'

In [11]:
# Calculate average error between orignial signal and reconstructed signal

In [12]:
# initialize 3 average error count
error_avg_128 = 0.0
error_avg_64 = 0.0
error_avg_32 = 0.0
data_size = 50000

In [13]:
# calculate sum of error among all signals in data set
for i in range(data_size):
    x = time_series[i,]
    
    resized = encoder128(x)
    y = decoder256(resized)
    error_avg_128 = error_avg_128 + float(np.linalg.norm(x-y))
    
    resized = encoder64(x)
    y = decoder256(resized)
    error_avg_64 = error_avg_64 + float(np.linalg.norm(x-y))
    
    resized = encoder32(x)
    y = decoder256(resized)
    error_avg_32 = error_avg_32 + float(np.linalg.norm(x-y))    

In [14]:
print(error_avg_128, error_avg_64, error_avg_32)

76273.23288734269 120327.73912184678 120327.73912184678


In [15]:
# calculate average error
error_avg_128 = error_avg_128 / data_size
error_avg_64 = error_avg_64 / data_size
error_avg_32 = error_avg_32 / data_size

print(error_avg_128, error_avg_64, error_avg_32)

1.5254646577468538 2.4065547824369355 2.4065547824369355


In [16]:
time_series.shape

(50000, 256)