In [None]:
import gymnasium as gym
import numpy as np
from random import sample
import pandas as pd
from utilities import split_in_blocks, float_to_bits, int_to_bits, read_timestamps
from constants import parameters_dict
import math

class CompressionEnv(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Box(low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), high=np.array([5, 5, 4, 4, 4, 7, 1, 1, 1]), shape=(9,),  dtype=np.float64)
        self.observation_space = gym.spaces.Box(low=np.array([0 for i in range(34 * 8)] ), high=np.array([255 for i in range(34 * 8)]), shape=(34 * 8,),  dtype=np.float32)
        self.idx = 0
        self.timeseries = ['monthly-beer-production.csv', 'monthly-housing.csv', 'Twitter_volume_AMZN.csv', 'nyc_taxi.csv', 'network.csv', 'monthly-housing.csv', 'cpu_utilization.csv', 'art-price.csv', 'Electric_Production.csv', 'Gold.csv', 'Electric_Production.csv', 'daily-temperatures.csv', 'oil.csv', 'transactions.csv']
        self.timestamps = []
        self.metrics = []
        self.crt_vec = 'timestamps'
        self.paramters = parameters_dict
        self.last_2_time = []
        self.last_2_metric = []
        self.compressed_block = ''
        self.get_ts()
        

    def get_ts(self):
        my_ts = sample(self.timeseries, 1)
        df = pd.read_csv(my_ts[0])

        self.timestamps = read_timestamps(df)
        
        self.metrics = (df.iloc[:, 1]).astype(np.float64).values
        self.metrics[np.isnan(self.metrics)==True] = 0
        self.timestamps[np.isnan(self.timestamps)==True] = 0
        self.last_2_time = self.timestamps[:2]
        self.last_2_metric = self.metrics[:2]
        self.timestamps = split_in_blocks(self.timestamps[2:])
        self.metrics = split_in_blocks(self.metrics[2:])

        self.state = self.timestamps[0]

    def bytes(self, vec):
        res = []
        for x in vec:
            if self.crt_vec == 'timestamps':
                y = int_to_bits(x)
            else:
                y = float_to_bits(x)
            for start in range(0, 64, 8):
                byte = y[start:start+8]
                byte = int(byte, 2)
                res.append(byte)
        return np.asarray(res).astype(np.float32)

    def step(self, action):
        params = np.round(action).astype(int)
        bits_count = self.compress(params)

        reward = self.evaluate(self.state, bits_count + 21)
        self.idx += 1
        if self.idx >= len(self.timestamps) and self.crt_vec == 'timestamps':
            self.idx = 0
            self.crt_vec = 'metrics'
        elif self.idx >= len(self.metrics) and self.crt_vec == 'metrics':
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_time, state])), reward, True, False, {'block': self.compressed_block, 'params': self.write_params(action)}

        if self.crt_vec == 'timestamps':
            self.state = self.timestamps[self.idx]
        else:
            self.state = self.metrics[self.idx]

        if self.crt_vec == 'timestamps':
            # if len(self.state) == 32:
            #     return self.bytes(np.concatenate([self.last_2_time, self.state])), reward, False, False, {}
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_time, state])), reward, False, False, {'block': self.compressed_block, 'params': self.write_params(action)}
        else:
            # if len(self.state) == 32:
            #     return self.bytes(np.concatenate([self.last_2_metric, self.state])), reward, False, False, {}
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_metric, state])), reward, False, False, {'block': self.compressed_block, 'params': self.write_params(action)}

    def write_params(self, action):
        params = np.round(action).astype(int)
        params_bits = ''
        for i in range(6):
            params_bits += int_to_bits(params[i])[-3:]
        for i in range(6, 9):
            params_bits += int_to_bits(params[i])[-1:]
        return params_bits

    def reset(self, seed=None):
        self.get_ts()
        self.crt_vec = 'timestamps'
        return self.bytes(np.concatenate([self.last_2_time, self.state])), {}

    def evaluate(self, original, compressed):
        val = (compressed / (len(original) * 64))
        return -math.tan(val) / 1.2 + 0.6

    def compress(self, params):
        self.compressed_block = ''
        transTypes = [self.paramters[1][params[0]], self.paramters[2][params[1]]]
        compressTypes = [self.paramters[4][params[2]], self.paramters[5][params[3]], self.paramters[6][params[4]]]
        encoding = {
            (0, 0): '010',
            (0, 1): '011',
            (0, 2): '100',
            (1, 0): '101',
            (1, 1): '110',
            (1, 2): '111',
            (-1, -1): '001',
            (3, 3): '000'
        }
        offByteShift1 = params[5]
        offByteShift2 = params[6]
        offByteShift3 = params[7]
        offBitmask = params[8] 
        #endian = params[10]

        sum = 0

        for i in range(len(self.state)):
            nr = self.state[i]
            opt = 64
            if self.crt_vec == 'timestamps':
                opt_compressed = int_to_bits(nr)
            else:
                opt_compressed = float_to_bits(nr)
            idx_op1 = idx_op2 = -1

            if self.crt_vec == 'timestamps':
                last_2 = self.last_2_time
            else:
                last_2 = self.last_2_metric
            val = 0
            for (i, transformer) in enumerate(transTypes):
                val = transformer(last_2[0], last_2[1], nr)

                if val == 0:
                    opt = 3
                    opt_compressed = ''
                    idx_op1 = idx_op2 = 3
                    continue

                if self.crt_vec == 'timestamps':
                    val = int_to_bits(val)
                else:
                    val = float_to_bits(val)

                for (j, (compresser, param)) in  enumerate(compressTypes):
                    if compresser.__name__ == 'offset' and param == 1:
                        ct_bits, compressed = compresser(val, offByteShift1, 1)
                    elif compresser.__name__ == 'offset' and param == 2:
                        ct_bits, compressed = compresser(val, offByteShift1 - offByteShift2, 2)
                    elif compresser.__name__ == 'offset' and param == 3:
                        ct_bits, compressed = compresser(val, offByteShift1 - offByteShift2 - offByteShift3, 3)
                    elif compresser.__name__ == 'bitmask':
                        ct_bits, compressed = compresser(val, offBitmask)
                    else:
                        ct_bits, compressed = compresser(val)
                        
                    if ct_bits < opt:
                        opt = ct_bits
                        opt_compressed = compressed
                        idx_op1 = i
                        idx_op2 = j
                        
            if self.crt_vec == 'timestamps':
                self.last_2_time = [last_2[1], self.state[i]]
            else:
                self.last_2_metric = [last_2[1], self.state[i]]
            sum += opt + 3
            self.compressed_block += encoding[(idx_op1, idx_op2)] + opt_compressed
        return sum


    def render(self):
        #useless
        pass

    def close(self):
        #useless
        pass

In [None]:
env = CompressionEnv()
action = env.action_space.sample()
state, reward, done, truncated, info = env.step(action)
sum = reward
ct = 1
maxi = 0
while not done:
    action = env.action_space.sample()
    state, reward, done, truncarted, info = env.step(action)
    #print(reward, action, state)
    maxi = max(maxi, reward)
    sum += reward
    ct += 1
print(sum / ct, maxi, ct)

In [None]:
from stable_baselines3.common.env_checker import check_env
env = CompressionEnv()
obs, _ = env.reset()
check_env(env)

In [None]:
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

In [None]:
#Use SAC from stable baselines
from stable_baselines3 import TD3
from stable_baselines3.sac.policies import MlpPolicy, MultiInputPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

env = CompressionEnv()
env = DummyVecEnv([lambda: env])

n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

#Add progress bar for learning
model = TD3('MlpPolicy', env, action_noise=action_noise, verbose=1)
model3 = model.learn(total_timesteps=50000, progress_bar=True)


In [None]:
model3 = model.learn(total_timesteps=700000, progress_bar=True, log_interval=20)


In [None]:
#save the model
model.save("TD3_CompressionEnv")

In [None]:
model2 = TD3('MlpPolicy', env, action_noise=action_noise, verbose=1)

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [None]:
env = CompressionEnv()
obs, _ = env.reset()

action, _states = model.predict(obs)
state, reward, done, truncated, info = env.step(action)
sum = reward
ct = 1
ratio = 0
original = 0
while not done:
    action, _state = model.predict(state)
    state, reward, done, truncarted, info = env.step(action)
    ratio += len(info['block']) + len(info['params'])
    original += 32 * 64
    #print(reward)
    sum += reward
    ct += 1
print(original / ratio)
print(sum / ct)
print('ceva')

In [None]:
from transform_primitives import xor, delta_xor, delta, delta_of_delta, rev_delta, rev_delta_of_delta
from compression_primitives import bitmask, offset, trailing_zero
env = CompressionEnv()
#env.get_ts()
print(env.metrics[0])
sample = env.metrics[0]
prev = env.last_2_metric
print(prev)
transformers = [delta, delta_of_delta, rev_delta, rev_delta_of_delta]
avg = 0
ct = 0
for j in range(len(env.metrics)):
    sample = env.metrics[j]
    for i in range(len(sample)):
        ct += 1
        mini = 64
        for t in transformers:
            val = t(prev[0], prev[1], sample[i])
            val = int(num_to_bits(val), 2)
            mini = min(mini, bitmask(val, 0), offset(val, 0), trailing_zero(val), offset(val, 1), offset(val, 2), bitmask(val, 1))
        avg += mini
        prev[0] = prev[1]
        prev[1] = sample[i]
print(avg / ct)
#daily-temperature 61.26 avg
#gold 61.82

In [None]:
action = [3, 0, 1, 3, 0, 1, 0, 0, 0, 0]
env2 = CompressionEnv()
print(env2.state)
state, reward, done, truncated, info = env2.step(action)
print(reward)

In [None]:
df = pd.read_csv('Gold.csv')
x = pd.to_datetime(df['DATE'], format='%Y/%m/%d')
print(x.astype(np.int64))

In [None]:
from utilities import int_to_bits, float_to_bits
print(int_to_bits(57), float_to_bits(2.257))