Time Series Data Compression Reconstruction by using Discrete Wavelet Transformation

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
import pywt # python library for wavelet transformation

Load seismic and synthetic data from files

In [2]:
import struct
# load data set: 50000 seismic signals
filename_seismic = 'seismic_size50k_len256_znorm.bin'
with open(filename_seismic, 'rb') as in_file:
    time_series_seismic = np.array(struct.unpack('f' * 50000 * 256, in_file.read())).reshape(-1,256)

# load data set: 50000 seismic signals
filename_synthetic = 'synthetic_size50k_len256_znorm.bin'
with open(filename_synthetic, 'rb') as in_file:
    time_series_synthetic = np.array(struct.unpack('f' * 50000 * 256, in_file.read())).reshape(-1,256)

Functions to compress and reconstruct time series

In [3]:
# compression of data set 'x' to half size of 'x', e.g. 256 -> 128
def compress(x):
    compressed_x, cD = pywt.dwt(x, 'db1')
    return compressed_x
# reconstruction of data set 'y' to twice size of 'y', e.g. 128 -> 256
def reconstruct(y):
    reconstructed_y = pywt.idwt(y, None, 'db1')
    return reconstructed_y

In [4]:
# Compress 'x' i times and get a [x.size / 2**i] size data
# e.g. using com_iterative(x, 3) on x which contains 256 float numbers,
#      we'll have a compressed [256 / 2**3] = 32 float numbers
def com_iterative(x, i):
    for k in range(i):
        compressed_x = compress(x)
        x = compressed_x
    return x
# reconstruction of data set 'y' to [y.original_size * 2**i]
def rec_iterative(y, i):
    tmp = y
    for k in range(i):
        reconstructed_y = reconstruct(tmp)
        tmp = reconstructed_y
    return reconstructed_y

In [5]:
# Interface functions to encode/decode easily
def encoder128(x):
    return com_iterative(x, int(math.log(x.shape[0]/128, 2)))
def encoder64(x):
    return com_iterative(x, int(math.log(x.shape[0]/64, 2)))
def encoder32(x):
    return com_iterative(x, int(math.log(x.shape[0]/32, 2)))
def encoder16(x):
    return com_iterative(x, int(math.log(x.shape[0]/16, 2)))
def decoder256(y):
    return rec_iterative(y, int(math.log(256/y.shape[0], 2)))

Data compression and reconstruction for seismic time series

In [6]:
# The variables below is used to save time serie compression and reconstruction
# For seismic data:
'''
resized128_seismic = np.empty((0,128),float)
resized64_seismic = np.empty((0,64),float)
resized32_seismic = np.empty((0,32),float)
resized16_seismic = np.empty((0,16),float)

reconstructed_128to256_seismic = np.empty((0,256),float)
reconstructed_64to256_seismic = np.empty((0,256),float)
reconstructed_32to256_seismic = np.empty((0,256),float)
reconstructed_16to256_seismic = np.empty((0,256),float)
'''

'\nresized128_seismic = np.empty((0,128),float)\nresized64_seismic = np.empty((0,64),float)\nresized32_seismic = np.empty((0,32),float)\nresized16_seismic = np.empty((0,16),float)\n\nreconstructed_128to256_seismic = np.empty((0,256),float)\nreconstructed_64to256_seismic = np.empty((0,256),float)\nreconstructed_32to256_seismic = np.empty((0,256),float)\nreconstructed_16to256_seismic = np.empty((0,256),float)\n'

In [7]:
# Initialize a variable to record reconstruction error
error_128_seismic = 0.0
error_64_seismic = 0.0
error_32_seismic = 0.0
error_16_seismic = 0.0

In [8]:
# Following code is to compress and reconstruct seismic data
# every time serie will be compressed from 256 to 128/64/32/16 
# then the compressed data will be "enlarged" from 128/64/32/16 to 256

data_size = 50000
for i in range(data_size):
    
    x = time_series_seismic[i,] # x contains 256 float numbers
    
    # 256 -> 128
    resized_128 = encoder128(x)
    reconstructed_128 = decoder256(resized_128)
    error_128_seismic = error_128_seismic + float(np.linalg.norm(x - reconstructed_128))
    
    # 256 -> 64
    resized_64 = encoder64(x)
    reconstructed_64 = decoder256(resized_64)
    error_64_seismic = error_64_seismic + float(np.linalg.norm(x - reconstructed_64))
    
    # 256 -> 32
    resized_32 = encoder32(x)
    reconstructed_32 = decoder256(resized_32)
    error_32_seismic = error_32_seismic + float(np.linalg.norm(x - reconstructed_32))
    
    # 256 -> 16
    resized_16 = encoder16(x)
    reconstructed_16 = decoder256(resized_16)
    error_16_seismic = error_16_seismic + float(np.linalg.norm(x - reconstructed_16))
    
    # Following code is to save compression and reconstruction to an array,
    # saving data takes much time than you could imagine
    # Please use the saved data in the folder "compressed_50k", 
    # an simple instruction to load data in the "ReadMe" file.
    '''
    resized128_seismic = np.vstack((resized128_seismic, resized_128))
    reconstructed_128to256_seismic = np.vstack((reconstructed_128to256_seismic, reconstructed_128))
    resized64_seismic = np.vstack((resized64_seismic, resized_64))
    reconstructed_64to256_seismic = np.vstack((reconstructed_64to256_seismic, reconstructed_64))
    resized32_seismic = np.vstack((resized32_seismic, resized_32))
    reconstructed_32to256_seismic = np.vstack((reconstructed_32to256_seismic,reconstructed_32))
    resized16_seismic = np.vstack((resized16_seismic, resized_16))
    reconstructed_16to256_seismic = np.vstack((reconstructed_16to256_seismic,reconstructed_16))

np.save("compressed128_50k_seismic.npy", resized128_seismic)
np.save("compressed64_50k_seismic.npy", resized64_seismic)
np.save("compressed32_50k_seismic.npy", resized32_seismic)
np.save("compressed16_50k_seismic.npy", resized16_seismic)

np.save("reconstructed_128to256_50k_seismic.npy", reconstructed_128to256_seismic)
np.save("reconstructed_64to256_50k_seismic.npy", reconstructed_64to256_seismic)
np.save("reconstructed_32to256_50k_seismic.npy", reconstructed_32to256_seismic)
np.save("reconstructed_16to256_50k_seismic.npy", reconstructed_16to256_seismic)
'''

In [10]:
print("Overall reconstruction error for ", data_size, " time series: (128 / 64 / 32 / 16)")
print(error_128_seismic, error_64_seismic, error_32_seismic, error_16_seismic)
print("Average reconstruction error for ", data_size, " time series: (128 / 64 / 32 / 16)")
print(error_128_seismic/data_size, error_64_seismic/data_size, error_32_seismic/data_size, error_16_seismic/data_size)

Overall reconstruction error for  50000  time series: (128 / 64 / 32 / 16)
632932.170386034 771074.4015698319 790471.3970702328 797346.2607445099
Average reconstruction error for  50000  time series: (128 / 64 / 32 / 16)
12.658643407720678 15.421488031396638 15.809427941404655 15.946925214890197


Data compression and reconstruction for synthetic time series

In [11]:
# The variables below is used to save time serie compression and reconstruction
# For synthetic data:
'''
resized128_synthetic = np.empty((0,128),float)
resized64_synthetic = np.empty((0,64),float)
resized32_synthetic = np.empty((0,32),float)
resized16_synthetic = np.empty((0,16),float)

reconstructed_128to256_synthetic = np.empty((0,256),float)
reconstructed_64to256_synthetic = np.empty((0,256),float)
reconstructed_32to256_synthetic = np.empty((0,256),float)
reconstructed_16to256_synthetic = np.empty((0,256),float)
'''

'\nresized128_synthetic = np.empty((0,128),float)\nresized64_synthetic = np.empty((0,64),float)\nresized32_synthetic = np.empty((0,32),float)\nresized16_synthetic = np.empty((0,16),float)\n\nreconstructed_128to256_synthetic = np.empty((0,256),float)\nreconstructed_64to256_synthetic = np.empty((0,256),float)\nreconstructed_32to256_synthetic = np.empty((0,256),float)\nreconstructed_16to256_synthetic = np.empty((0,256),float)\n'

In [12]:
# Initialize a variable to record reconstruction error
error_128_synthetic = 0.0
error_64_synthetic = 0.0
error_32_synthetic = 0.0
error_16_synthetic = 0.0

In [13]:
# Following code is to compress and reconstruct seismic data
# every time serie will be compressed from 256 to 128/64/32/16 
# then the compressed data will be "enlarged" from 128/64/32/16 to 256

data_size = 50000
for i in range(data_size):
    
    x = time_series_synthetic[i,] # x contains 256 float numbers
    
    # 256 -> 128
    resized_128 = encoder128(x)
    reconstructed_128 = decoder256(resized_128)
    error_128_synthetic = error_128_synthetic + float(np.linalg.norm(x - reconstructed_128))
    
    # 256 -> 64
    resized_64 = encoder64(x)
    reconstructed_64 = decoder256(resized_64)
    error_64_synthetic = error_64_synthetic + float(np.linalg.norm(x - reconstructed_64))
    
    # 256 -> 32
    resized_32 = encoder32(x)
    reconstructed_32 = decoder256(resized_32)
    error_32_synthetic = error_32_synthetic + float(np.linalg.norm(x - reconstructed_32))
    
    # 256 -> 16
    resized_16 = encoder16(x)
    reconstructed_16 = decoder256(resized_16)
    error_16_synthetic = error_16_synthetic + float(np.linalg.norm(x - reconstructed_16))
    
    # Following code is to save compression and reconstruction to an array,
    # saving data takes much time than you could imagine
    # Please use the saved data in the folder "compressed_50k", 
    # an simple instruction to load data in the "ReadMe" file.
    '''
    resized128_synthetic = np.vstack((resized128_synthetic, resized_128))
    reconstructed_128to256_synthetic = np.vstack((reconstructed_128to256_synthetic, reconstructed_128))
    resized64_synthetic = np.vstack((resized64_synthetic, resized_64))
    reconstructed_64to256_synthetic = np.vstack((reconstructed_64to256_synthetic, reconstructed_64))
    resized32_synthetic = np.vstack((resized32_synthetic, resized_32))
    reconstructed_32to256_synthetic = np.vstack((reconstructed_32to256_synthetic,reconstructed_32))
    resized16_synthetic = np.vstack((resized16_synthetic, resized_16))
    reconstructed_16to256_synthetic = np.vstack((reconstructed_16to256_synthetic,reconstructed_16))

np.save("compressed128_50k_synthetic.npy", resized128_synthetic)
np.save("compressed64_50k_synthetic.npy", resized64_synthetic)
np.save("compressed32_50k_synthetic.npy", resized32_synthetic)
np.save("compressed16_50k_synthetic.npy", resized16_synthetic)

np.save("reconstructed_128to256_50k_synthetic.npy", reconstructed_128to256_synthetic)
np.save("reconstructed_64to256_50k_synthetic.npy", reconstructed_64to256_synthetic)
np.save("reconstructed_32to256_50k_synthetic.npy", reconstructed_32to256_synthetic)
np.save("reconstructed_16to256_50k_synthetic.npy", reconstructed_16to256_synthetic)
'''

In [14]:
print("Overall reconstruction error for ", data_size, " time series: (128 / 64 / 32 / 16)")
print(error_128_synthetic, error_64_synthetic, error_32_synthetic, error_16_synthetic)
print("Average reconstruction error for ", data_size, " time series: (128 / 64 / 32 / 16)")
print(error_128_synthetic/data_size, error_64_synthetic/data_size, error_32_synthetic/data_size, error_16_synthetic/data_size)

Overall reconstruction error for  50000  time series: (128 / 64 / 32 / 16)
76273.23288734269 120327.73912184678 173555.64466821024 244335.4583699993
Average reconstruction error for  50000  time series: (128 / 64 / 32 / 16)
1.5254646577468538 2.4065547824369355 3.4711128933642046 4.886709167399986


In [15]:
'''
'''

'\n'

In [16]:
# Similarity search
# to be updated