## Setup and configs

 good artcle https://www.techbriefs.com/component/content/article/tb/techbriefs/information-sciences/13857?start=1


In [20]:
from math import floor, ceil
from typing import AnyStr


ASCII_TO_INT = {i.to_bytes(1, 'big'): i for i in range(256)}
INT_TO_ASCII = {i: b for b, i in ASCII_TO_INT.items()}


def compress(data: AnyStr) -> bytes:
    if isinstance(data, str):
        data = data.encode()
    keys = ASCII_TO_INT.copy()
    n_keys= 256
    compressed = []
    start = 0
    n_data = len(data)+1
    while True:
        if n_keys >= 512:
            keys = ASCII_TO_INT.copy()
            n_keys = 256
        for i in range(1, n_data-start):
            w = data[start:start+i]
            if w not in keys:
                compressed.append(keys[w[:-1]])
                keys[w] = n_keys
                start += i-1
                n_keys += 1
                break
        else:
            compressed.append(keys[w])
            break
    bits = ''.join([bin(i)[2:].zfill(9) for i in compressed])
    return int(bits, 2).to_bytes(ceil(len(bits) / 8), 'big')


def decompress(data: AnyStr) -> bytes:
    if isinstance(data, str):
        data = data.encode()
    keys = INT_TO_ASCII.copy()
    bits = bin(int.from_bytes(data, 'big'))[2:].zfill(len(data) * 8)
    n_extended_bytes = floor(len(bits) / 9)
    bits = bits[-n_extended_bytes * 9:]
    data_list = [int(bits[i*9:(i+1)*9], 2)
                       for i in range(n_extended_bytes)]
    previous = keys[data_list[0]]
    uncompressed = [previous]
    n_keys = 256
    for i in data_list[1:]:
        if n_keys >= 512:
            keys = INT_TO_ASCII.copy()
            n_keys = 256
        try:
            current = keys[i]
        except KeyError:
            current = previous + previous[:1]
        uncompressed.append(current)
        keys[n_keys] = previous + current[:1]
        previous = current
        n_keys += 1
    return b''.join(uncompressed)

In [22]:
import numpy as np
import pandas as pd
#import Lempel_Ziv
import scipy.fftpack as fft


def coefs_compress(coefs):
    # represent the compressed data https://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/20080009460.pdf
    ctrl = 0
    coefs_noZeros = np.array([])
    for coef in coefs:
        if coef:
            ctrl = (ctrl << 1) | 1  # mark existence of coef
            coefs_noZeros = np.append(coefs_noZeros, coef)
        else:
            ctrl = (ctrl << 1)  # mark no existence of coef

    temp = np.append(coefs_noZeros, len(coefs))
    data = temp.tobytes()
    compressed_data = (Lempel_Ziv.compress(data))
    return compressed_data, ctrl


def decompress_coefs(compressed_data):
    decompressed = np.frombuffer(Lempel_Ziv.decompress(compressed_data[0]), dtype='float')

    ctrl = compressed_data[1]  # int(decompressed[-2])
    block_size = int(decompressed[-1])
    coefs = np.zeros(block_size)
    idx_decompressed = len(decompressed)-2  # -3 cuz we don't count the ctrl and then all coefs are located
    idx_coefs = 0  # -3 cuz we don't count the ctrl and then all coefs are located
    # TODO - turn it to for loop!!
    while ctrl:
        ctrl_bit = ctrl & 1
        if ctrl_bit: # coef exist
            coefs[idx_coefs] = decompressed[idx_decompressed]
            idx_decompressed -= 1
        else:
            coefs[idx_coefs] = 0
        idx_coefs += 1
        ctrl = ctrl >> 1

    return np.flip(coefs)


def block_compress(sample):
    coefs = fft.dct(sample, norm='ortho')
    return coefs, len(sample)


def block_decompress(coefs):
    sample = fft.idct(coefs, norm='ortho')
    return sample


if __name__ == '__main__':
    data = pd.read_csv('../../ecg_temp.csv',header=None)[1]
    coefs,sample_size= block_compress(data,len(data)+40)
    recovered = block_decompress(coefs, sample_size)
    print (np.linalg.norm(recovered-data))




FileNotFoundError: [Errno 2] File b'../../ecg_temp.csv' does not exist: b'../../ecg_temp.csv'

In [16]:
import numpy as np
import json
#from os import listdir,getcwd,chdir
#chdir('../')

from os.path import isfile,join
#sys.path.append('src/DataLoader')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
#import sys
#sys.path.append('../src/compression_funcs')
import chebyshev_compression as cheb
import Adaptive_chebyshev_tests as cheb_tests
import Company_Loader as company_data_loader
import MIT_BIH_Loader as ecg_data_loader
import REDD_Loader as power_data_loader
import scipy

getcwd()
print('hello')

ModuleNotFoundError: No module named 'chebyshev_compression'

In [15]:
%matplotlib inl2q9ine

KeyError: 'inl2q9ine'

In [None]:
company_path_bin = join(getcwd(),r'Data\TheCompany')
company_path_csv = join(getcwd(),r'Data\TheCompany\csv\temp_company.csv')

mit_bih_csv = join(getcwd(),r'Data\MIT_BIH_CSV') 

gas_path_bin = join(getcwd(),r'Data\GAS')

REDD_path_csv = join(getcwd(),r'Data\REDD')

data_path = mit_bih_csv

In [None]:
raw_data = ecg_data_loader.load_data(data_path)
plt.plot(raw_data[:10])
plt.title('Sanity check total sample {0}'.format(raw_data.shape[0]))

In [None]:
def calc_stats(coefs,orignal,reconstrcuted):
    dict_toReturn = {
        "block_size:":len(orignal),
        "num of zeros in coefs":np.size((np.where(coefs == 0))),
        "loss norm2":np.linalg.norm(reconstrcuted-orignal),
        "loss factor":(np.std(reconstrcuted) - np.std(orignal)) / np.std(orignal),
        "compression gain":float(len(orignal))/(len(orignal)-np.size((np.where(coefs == 0))))
    }
   
    return dict_toReturn

def print_lot_statistics(coefs,orignal,reconstrcuted):
    #statistics
    dict_toReturn = calc_stats(coefs,orignal,reconstrcuted)
    print (dict_toReturn)

    #plot
    plt.plot(orignal)
    plt.plot(reconstrcuted)
    plt.legend(('original', 'decompressed'))
    plt.title('Compressed vs decompressed block_size {0}'.format(i))
    plt.show()

    plt.plot(coefs)
    plt.title('coeficient. count={0}'.format(len(coefs)))
    plt.show()
    return dict_toReturn

## DCT - RAW

In [None]:
i=512
offset = 500
company_cheb_transform_sample = raw_data[offset:offset+i]

coefs = scipy.fftpack.dct(company_cheb_transform_sample,norm = 'ortho')
company_cheb_transform_sample_reconstructed = scipy.fftpack.idct(coefs,norm = 'ortho')

print_lot_statistics(coefs,company_cheb_transform_sample,company_cheb_transform_sample_reconstructed)

## Chebyshev transform

In [None]:
#Sample a subset data
i=512
offset = 100
company_cheb_transform_sample = raw_data[offset:offset+i]

# chebyshev transofrm and back
coefs,sample_size= cheb.block_compress(company_cheb_transform_sample)
company_cheb_transform_sample_reconstructed = cheb.block_decompress(coefs)

#print plot statistics
print_lot_statistics(coefs,company_cheb_transform_sample,company_cheb_transform_sample_reconstructed)

## Chebyshev transform threshold retain

In [None]:
def evaluate_chebysehv(company_cheb_transform_sample_thres,threshold_idx=-1,print_stats=False):
    # chebyshev transofrm and back
   
    coefs,sample_size= cheb.block_compress(company_cheb_transform_sample_thres)
    
    if threshold_idx>-1:
        threshold = sorted(np.abs(coefs))[threshold_idx]
        coefs = np.where(np.abs(coefs)>threshold,coefs,0)

    company_cheb_transform_sample_thres_reconstructed = cheb.block_decompress(coefs)
    #print plot statistics
    if print_stats:
        return print_lot_statistics(coefs,company_cheb_transform_sample_thres,company_cheb_transform_sample_thres_reconstructed)
    else:
        return calc_stats(coefs,company_cheb_transform_sample_thres,company_cheb_transform_sample_thres_reconstructed)


In [None]:
#Sample a subset data
offset = 8000000
i=1200
company_cheb_transform_sample_thres = raw_data[offset:offset+i]
evaluate_chebysehv(company_cheb_transform_sample_thres,1100,True)

In [None]:
pd.DataFrame(raw_data[offset:offset+i]).to_csv('ecg_temp.csv')

## Sliding Chebyshev

In [None]:

cheb_tests.Sens_Test_Automatic('ecg_temp.csv')

## IoT Data Compression Sensor - Agnostic Approach ##
https://ieeexplore-ieee-org.ezprimo1.idc.ac.il/stamp/stamp.jsp?arnumber=7149287

https://infoscience.epfl.ch/record/181860/files/TKDE.pdf

https://infoscience.epfl.ch/record/181860/files/TKDE.pdf

https://www-user.tu-chemnitz.de/~potts/paper/sparse_Cheb.pdf

 - https://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/20080009460.pdf
 

In [None]:
print('hello')