In [3]:
import gymnasium as gym
import numpy as np
from random import sample
import pandas as pd
from utilities import split_in_blocks, float_to_bits, int_to_bits, read_timestamps, bits_to_int, unsigned_to_signed, bits_to_int_unsigned, bits_to_float
from transform_primitives import delta_of_delta
from compression_primitives import bitmask, trailing_zero
from constants import parameters_dict
import math
import struct
import ammmo_lazy as aml
from time import time

class CompressionEnv(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Box(low=np.array([0, 0, 0, 0, 0, 0, 0, 0]), high=np.array([3, 5, 5, 5, 7, 1, 1, 5]), shape=(8,),  dtype=np.float64)
        self.observation_space = gym.spaces.Box(low=np.array([0 for i in range(34 * 8)] ), high=np.array([1 for i in range(34 * 8)]), shape=(34 * 8,),  dtype=np.float32)
        self.idx = 0
        #self.timeseries = []#, 'grok_asg_anomaly.csv', 'occupancy_t4013.csv','Sunspots.csv', 'monthly-beer-production.csv', 'monthly-housing.csv', 'cpu_utilization.csv', 'art-price.csv', 'Gold.csv', 'Electric_Production.csv', 'daily-temperatures.csv', 'oil.csv', 'rogue_agent_key_updown.csv']
        self.timeseries = ['art-price.csv', 'monthly-housing.csv']
        self.timestamps = []
        self.timestamps_ratio = 0
        self.metrics = []
        self.paramters = parameters_dict
        self.last_2_metric = []
        self.compressed_block = ''
        self.compare_ratio = 0
        self.compress_time = 0
        self.decompress_time = 0
        self.get_ts()
        

    def get_ts(self):
        my_ts = sample(self.timeseries, 1)
        #print(my_ts[0])
        df = pd.read_csv(f'timeseries/{my_ts[0]}')
        self.timestamps = read_timestamps(df)
        self.compress_time -= time()
        self.timestamps_ratio, compression = self.compress_timestamps(self.timestamps)
        _ = self.decompress_timestamps(compression)
        self.compress_time += time()
        self.idx = 0
        
        self.metrics = (df.iloc[:, 1]).astype(np.float64).values
        self.metrics[np.isnan(self.metrics)==True] = 0
        self.timestamps[np.isnan(self.timestamps)==True] = 0
        self.last_2_metric = self.metrics[:2]
        self.metrics = split_in_blocks(self.metrics[2:])

        self.state = self.metrics[0]

    def compress_timestamps(self, timestamps):
        sum = 192
        header = int_to_bits(len(timestamps) - 2)
        data = int_to_bits(timestamps[0]) + int_to_bits(timestamps[1])
        last_2 = [timestamps[0], timestamps[1]]

        for i in range(2, len(timestamps)):
            val = delta_of_delta(last_2[0], last_2[1], timestamps[i])
            if val == 0:
                header += '0'
                sum += 1
            else:
                header += '1'
                sum += 1
                if val > 0:
                    val -= 1
                if -3 <= val <= 4:
                    data += '1'
                    sum += 4
                    data += int_to_bits(val)[-3:]
                elif -31 <= val <= 32:
                    data += '01'
                    sum += 8
                    data += int_to_bits(val)[-6:]
                else:
                    ct_bitmask, compressed_bitmask = bitmask(int_to_bits(val), 0)
                    ct_trailing_zeros, compressed_trailing = trailing_zero(int_to_bits(val))

                    if ct_bitmask < ct_trailing_zeros:
                        data += '001'
                        sum += ct_bitmask + 3
                        data += compressed_bitmask
                    else:
                        data += '000'
                        sum += ct_trailing_zeros + 3
                        data += compressed_trailing
            last_2 = [last_2[1], timestamps[i]]
        
        return (len(timestamps) * 64 / sum), header + data

    def decompress_timestamps(self, compressed):
        size = int(compressed[:64], 2)
        header = compressed[64:64+size]
        data = compressed[64+size:]

        orig_timestamps = []

        orig_timestamps.append(int(data[:64], 2))
        orig_timestamps.append(int(data[64:128], 2))

        last_2 = [orig_timestamps[0], orig_timestamps[1]]

        data_idx = 128

        for x in header:
            if x == '0':
                orig_timestamps.append(2 * last_2[1] - last_2[0])
            else:
                if data[data_idx] == '1':
                    val = bits_to_int(data[data_idx+1:data_idx+4])
                    if val >= 0:
                        val += 1
                    orig_timestamps.append(val + 2 * last_2[1] - last_2[0])
                    data_idx += 4
                else:
                    if data[data_idx+1] == '1':
                        val = bits_to_int(data[data_idx+2:data_idx+8])
                        if val >= 0:
                            val += 1
                        orig_timestamps.append(val + 2 * last_2[1] - last_2[0])
                        data_idx += 8
                    else:
                        val = 0
                        if data[data_idx+2] == '1':
                            last_bit = data_idx + 11
                            mask = data[data_idx+3:data_idx+11]
                            for bit in mask:
                                if bit == '0':
                                    val <<= 8
                                else:
                                    crt_byte = data[last_bit:last_bit+8]
                                    val = (val << 8) + int(crt_byte, 2)
                                    last_bit += 8
                            data_idx = last_bit + 8
                        else:
                            ct_zero_bytes = int(data[data_idx+3:data_idx+6], 2)
                            ct_non_zero_bytes = int(data[data_idx+6:data_idx+9], 2) + 1

                            for i in range(ct_non_zero_bytes):
                                crt_byte = data[data_idx+9+i*8:data_idx+9+(i+1)*8]
                                val = (val << 8) + int(crt_byte, 2)
                            val = (val << 8 * ct_zero_bytes)
                            data_idx += 9 + ct_non_zero_bytes * 8
                        val = unsigned_to_signed(val)
                        if val >= 0:
                            val += 1
                        orig_timestamps.append(val + 2 * last_2[1] - last_2[0])
                            
            last_2 = [last_2[1], orig_timestamps[-1]]
        return orig_timestamps


    def bytes(self, vec):
        res = []
        for x in vec:
            y = float_to_bits(x)
            for start in range(0, 64, 8):
                byte = y[start:start+8]
                byte = int(byte, 2)
                res.append(byte / 255)
        return np.asarray(res).astype(np.float32)

    def decompress_metrics(self, compressed_block):
        params = compressed_block[:19]
        header = compressed_block[19:51]
        majorMode = bits_to_int_unsigned(params[:2])
        transTypes = [self.paramters[4][bits_to_int_unsigned(params[2:5])], self.paramters[4][bits_to_int_unsigned(params[5:8])], self.paramters[4][bits_to_int_unsigned(params[8:11])]]
        encodings = {
            '00': 0,
            '01': 1,
            '10': 2,
            '11': 3
        }
        possibilites = self.paramters[3][majorMode]
        offByteShift1 = bits_to_int_unsigned(params[11:14])
        offByteShift2 = bits_to_int_unsigned(params[14:15])
        offByteShift3 = bits_to_int_unsigned(params[15:16])
        offBitmask = bits_to_int_unsigned(params[16:19])
        proper_block = compressed_block[51:]

        orig_block = []
        prev = self.last_2_metric
        step = 0

        for x in header:
            if x == '0':
                orig_block.append(prev[1])
                prev = [prev[1], orig_block[-1]]
            else:
                encoding = proper_block[step:step+2]
                idx = encodings[encoding]
                step += 2
                transIdx, compresser, mode = possibilites[idx]

                if compresser.__name__ == 'bitmask':
                    mask = proper_block[step: step + 8 - offBitmask]
                    val = ''
                    step = step + 8 - offBitmask
                    for j in mask:
                        if j == '0':
                            val += '00000000'
                        else:
                            val += proper_block[step:step + 8]
                            step += 8
                    val += (offBitmask * 8) * '0'
                    val = bits_to_float(val)
                    orig_block.append(transTypes[transIdx](prev[0], prev[1], val))
                elif compresser.__name__ == 'offset':
                    offset = offByteShift1
                    match mode:
                        case 1:
                            offset = offByteShift1
                        case 2:
                            offset = offByteShift1 - offByteShift2
                        case 3:
                            offset = offByteShift1 - offByteShift2 - offByteShift3
                    leading_zeros = 64 - offset * 8 - mode * 7
                    val = '0' * leading_zeros + proper_block[step:step+mode * 7] + '0' * (offset * 8)
                    value = bits_to_float(val)
                    value = transTypes[transIdx](prev[0], prev[1], value)
                    orig_block.append(value)
                    step = step + mode * 7
                else:
                    if proper_block[step:step+4] == '1111':
                        val = transTypes[transIdx](prev[0], prev[1], 0.0)
                        step = step + 4
                        orig_block.append(val)
                    else:
                        ct_zero_bytes = int(proper_block[step:step+3], 2)
                        ct_non_zero_bytes = int(proper_block[step+3:step+6], 2) + 1

                        val = 8 * (8 - ct_zero_bytes - ct_non_zero_bytes) * '0'

                        for j in range(ct_non_zero_bytes):
                            crt_byte = proper_block[step+6+j*8:step+6+(j+1)*8]
                            val += crt_byte
                        val += ct_zero_bytes * 8 * '0'
                        step += 6 + ct_non_zero_bytes * 8
                        val = bits_to_float(val)
                        val = transTypes[transIdx](prev[0], prev[1], val)
                        
                        orig_block.append(val)
                
                prev = [prev[1], orig_block[-1]]

        return orig_block


    def step(self, action):
        params = np.round(action).astype(int)
        prev_2 = self.last_2_metric
        self.compare_ratio = len(aml.compress_metrics(np.concatenate([prev_2, self.state]))[128:])
        self.compress_time -= time()
        bits_count = self.compress(params)
        self.compress_time += time()

        self.decompress_time -= time()
        self.last_2_metric = prev_2
        orig_block = self.decompress_metrics(self.compressed_block)
        self.decompress_time += time()

        # for i in range(32):
        #     if orig_block[i] != self.state[i]:
        #         print('Error')
        #         print(orig_block[i], self.state[i])
        # self.last_2_metric = [self.state[-2], self.state[-1]]
        # ###########

        reward = self.evaluate(bits_count)
        self.idx += 1
        if self.idx >= len(self.metrics):
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_metric, state])), reward, True, False, {'block': self.compressed_block, 'params': self.write_params(action)}

        self.state = self.metrics[self.idx]

        state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
        return self.bytes(np.concatenate([self.last_2_metric, state])), reward, False, False, {'block': self.compressed_block, 'params': self.write_params(action)}

    def write_params(self, action):
        params = np.round(action).astype(int)
        params_bits = ''
        params_bits += int_to_bits(params[0])[-2:]
        for i in range(4):
            params_bits += int_to_bits(params[i+1])[-3:]
        params_bits += int_to_bits(params[5])[-1:]
        params_bits += int_to_bits(params[6])[-1:]
        params_bits += int_to_bits(params[7])[-3:]
        return params_bits

    def reset(self, seed=None):
        super().reset(seed=seed)
        sz = len(self.timestamps)
        print(self.compress_time / sz * 10000, self.decompress_time / sz * 10000)
        self.get_ts()
        return self.bytes(np.concatenate([self.last_2_metric, self.state])), {}

    def evaluate(self, compressed):
        val = compressed / self.compare_ratio
        return -math.tanh(val - 1.2) / 0.5

    def compress(self, params):
        self.compressed_block = ''
        header = self.write_params(params)
        majorMode = params[0]
        transTypes = [self.paramters[1][params[1]], self.paramters[1][params[2]], self.paramters[1][params[3]]]
        offByteShift1 = params[4]
        offByteShift2 = params[5]
        offByteShift3 = params[6]
        offBitmask = params[7] 
        #endian = params[10]
        posibilities = self.paramters[3][majorMode]
        encodings = ['00', '01', '10', '11']

        for i in range(len(self.state)):
            nr = self.state[i]
            opt = 100
            opt_compressed = ''
            idx_op = -1

            last_2 = self.last_2_metric
            val = 0
            if nr == last_2[1]:
                header += '0'
                self.last_2_metric = [last_2[1], nr]
                continue
                
            header += '1'

            for (j, (transformer, compresser, param)) in  enumerate(posibilities):
                val = transTypes[transformer](last_2[0], last_2[1], nr)
                val = float_to_bits(val)
                if compresser.__name__ == 'offset' and param == 1:
                    ct_bits, compressed = compresser(val, offByteShift1, 1)
                elif compresser.__name__ == 'offset' and param == 2:
                    ct_bits, compressed = compresser(val, offByteShift1 - offByteShift2, 2)
                elif compresser.__name__ == 'offset' and param == 3:
                    ct_bits, compressed = compresser(val, offByteShift1 - offByteShift2 - offByteShift3, 3)
                elif compresser.__name__ == 'bitmask':
                    ct_bits, compressed = compresser(val, offBitmask)
                elif compresser.__name__ == 'trailing_zero':
                    ct_bits, compressed = compresser(val)
                    
                if ct_bits < opt:
                    opt = ct_bits
                    opt_compressed = compressed
                    idx_op = j
                        
            self.last_2_metric = [last_2[1], self.state[i]]
            self.compressed_block += encodings[idx_op] + opt_compressed
        self.compressed_block = header + self.compressed_block
        return len(self.compressed_block)


    def render(self):
        #useless
        pass

    def close(self):
        #useless
        pass

In [5]:
env = CompressionEnv()
action = env.action_space.sample()
print(action)
state, reward, done, truncated, info = env.step(action)
sum = reward
ct = 1
maxi = 0
while not done:
    action = env.action_space.sample()
    state, reward, done, truncarted, info = env.step(action)
    #print(reward, action, state)
    # if env.idx >= 1000:
    #     done = True
#     maxi = max(maxi, reward)
#     sum += reward
#     ct += 1
print(sum / ct, maxi, ct)

[0.21107405 0.6743525  4.75969004 4.57592862 1.76091775 0.53941143
 0.48176116 0.25349283]
0.12710120445484477 0 1


In [6]:
from stable_baselines3.common.env_checker import check_env
env = CompressionEnv()
obs, _ = env.reset()
check_env(env)

2024-06-08 17:32:05.719637: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-08 17:32:05.719787: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-08 17:32:05.810493: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-08 17:32:06.006134: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

In [4]:
#Use SAC from stable baselines

from stable_baselines3 import TD3, DDPG, SAC, A2C, PPO
from stable_baselines3.sac.policies import MlpPolicy, MultiInputPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

env = CompressionEnv()
env = DummyVecEnv([lambda: env])

n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

#Add progress bar for learning
# I have this eerror TypeError: TD3Policy.forward() takes from 2 to 3 positional arguments but 4 were given

model = SAC('MlpPolicy', env, action_noise=action_noise, verbose=1, gamma=0.90, policy_kwargs=dict(net_arch=dict(pi=[1024, 512], qf=[1024, 512])))
#model = PPO('MlpPolicy', env, verbose=1, gamma=0.01, use_sde=True, net_arch=[1024, 512])
#model3 = model.learn(total_timesteps=50000, progress_bar=True, log_interval=20)


Using cuda device


In [None]:
model3 = model.learn(total_timesteps=1000000, progress_bar=True, log_interval=80)


In [5]:
model4 = SAC.load('SAC_3m.zip', env=env)

In [None]:
model4.learn(total_timesteps=1000000, progress_bar=True, log_interval=80)

In [None]:
#save the model
model4.save("SAC_3m")

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [20]:
env = CompressionEnv()
obs, _ = env.reset()

print(env.timestamps_ratio)

action, _states = model4.predict(obs)
state, reward, done, truncated, info = env.step(action)
sum = reward
ct = 1
ratio = 128 + len(info['block'])
original = 32 * 64
while not done:
    action, _state = model4.predict(state)
    state, reward, done, truncarted, info = env.step(action)
    ratio += len(info['block'])
    original += 32 * 64
    #print(reward)
    sum += reward
    ct += 1
env.reset()
#print(original / ratio, env.timestamps_ratio, (original + len(env.timestamps) * 64) / (ratio + len(env.timestamps) * 64 / env.timestamps_ratio) , sum / ct)

0.008812972477504186 0.0
61.11984841307437
0.5725463704457359 0.14662033035641625


(array([0.2509804 , 0.2       , 0.7607843 , 0.88235295, 0.40392157,
        0.60784316, 0.40392157, 0.10980392, 0.2509804 , 0.20392157,
        0.5019608 , 0.21176471, 0.6117647 , 0.23529412, 0.4117647 ,
        0.7764706 , 0.2509804 , 0.2       , 0.9647059 , 0.18039216,
        0.13333334, 0.22745098, 0.60784316, 0.05882353, 0.2509804 ,
        0.20784314, 0.49019608, 0.50980395, 0.07450981, 0.8117647 ,
        0.5921569 , 0.38431373, 0.2509804 , 0.20392157, 0.1882353 ,
        0.05882353, 0.6901961 , 0.6313726 , 0.25882354, 0.90588236,
        0.2509804 , 0.2       , 0.9254902 , 0.31764707, 0.9647059 ,
        0.7921569 , 0.47843137, 0.63529414, 0.2509804 , 0.20784314,
        0.69803923, 0.7921569 , 0.6039216 , 0.11764706, 0.8980392 ,
        0.9764706 , 0.2509804 , 0.20392157, 0.8784314 , 0.9647059 ,
        0.30980393, 0.22352941, 0.22352941, 0.5529412 , 0.2509804 ,
        0.19607843, 0.44705883, 0.22745098, 0.13725491, 0.56078434,
        0.7607843 , 0.6431373 , 0.2509804 , 0.19

In [None]:
from transform_primitives import xor, delta_xor, delta, delta_of_delta, rev_delta, rev_delta_of_delta
from compression_primitives import bitmask, offset, trailing_zero
env = CompressionEnv()
#env.get_ts()
print(env.metrics[0])
sample = env.metrics[0]
prev = env.last_2_metric
print(prev)
transformers = [delta, delta_of_delta, rev_delta, rev_delta_of_delta]
avg = 0
ct = 0
for j in range(len(env.metrics)):
    sample = env.metrics[j]
    for i in range(len(sample)):
        ct += 1
        mini = 64
        for t in transformers:
            val = t(prev[0], prev[1], sample[i])
            val = int(num_to_bits(val), 2)
            mini = min(mini, bitmask(val, 0), offset(val, 0), trailing_zero(val), offset(val, 1), offset(val, 2), bitmask(val, 1))
        avg += mini
        prev[0] = prev[1]
        prev[1] = sample[i]
print(avg / ct)
#daily-temperature 61.26 avg
#gold 61.82

In [None]:
action = [3, 0, 1, 3, 0, 1, 0, 0, 0, 0]
env2 = CompressionEnv()
print(env2.state)
state, reward, done, truncated, info = env2.step(action)
print(reward)

In [None]:
df = pd.read_csv('Gold.csv')
x = pd.to_datetime(df['DATE'], format='%Y/%m/%d')
print(x.astype(np.int64))

In [None]:
from utilities import int_to_bits, float_to_bits
print(int_to_bits(57), float_to_bits(2.257))