In [188]:
import gymnasium as gym
import numpy as np
from random import sample
import pandas as pd
from utilities import split_in_blocks, float_to_bits, int_to_bits
from constants import parameters_dict
import math

class CompressionEnv(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Box(low=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), high=np.array([5, 5, 5, 5, 5, 7, 1, 1, 1]), shape=(9,),  dtype=np.float64)
        self.observation_space = gym.spaces.Box(low=np.array([0 for i in range(34 * 8)] ), high=np.array([255 for i in range(34 * 8)]), shape=(34 * 8,),  dtype=np.float32)
        self.idx = 0
        self.timeseries = ['monthly-beer-production.csv', 'monthly-housing.csv', 'Twitter_volume_AMZN.csv', 'nyc_taxi.csv', 'network.csv', 'monthly-housing.csv', 'cpu_utilization.csv', 'art-price.csv', 'Electric_Production.csv', 'Gold.csv', 'Electric_Production.csv', 'daily-temperatures.csv', 'oil.csv', 'transactions.csv']
        self.timestamps = []
        self.metrics = []
        self.crt_vec = 'timestamps'
        self.paramters = parameters_dict
        self.last_2_time = []
        self.last_2_metric = []
        self.get_ts()
        

    def get_ts(self):
        my_ts = sample(self.timeseries, 1)
        df = pd.read_csv(my_ts[0])

        try:
            #Convert the date to timestamp
            self.timestamps = pd.to_datetime(df['DATE'], format='%Y-%m-%d').astype(np.int64).values // 1000000
        except:
            try:
                self.timestamps = pd.to_datetime(df['DATE'], format='%d-%m-%Y').astype(np.int64).values // 1000000
            except:
                try:
                    self.timestamps = pd.to_datetime(df['DATE'], format='%m-%d-%Y').astype(np.int64).values // 1000000
                except:
                    try:
                        self.timestamps = pd.to_datetime(df['DATE'], format='%Y/%d/%m').astype(np.int64).values // 1000000
                    except:
                        try:
                            self.timestamps = pd.to_datetime(df['DATE'], format='%d/%m/%Y').astype(np.int64).values // 1000000
                        except:
                            try:
                                self.timestamps = pd.to_datetime(df['DATE'], format='%m/%d/%Y').astype(np.int64).values // 1000000
                            except:
                                try:
                                    self.timestamps = pd.to_datetime(df['DATE'], format='%Y-%m').astype(np.int64).values // 1000000
                                except:
                                    self.timestamps = pd.to_datetime(df['DATE'], format='%Y-%m-%d %H:%M:%S').astype(np.int64).values // 1000000
        self.metrics = (df.iloc[:, 1]).astype(np.float64).values
        self.metrics[np.isnan(self.metrics)==True] = 0
        self.last_2_time = self.timestamps[:2]
        self.last_2_metric = self.metrics[:2]
        self.timestamps = split_in_blocks(self.timestamps[2:])
        self.metrics = split_in_blocks(self.metrics[2:])

        self.state = self.timestamps[0]

    def bytes(self, vec):
        res = []
        for x in vec:
            if self.crt_vec == 'timestamps':
                y = int_to_bits(x)
            else:
                y = float_to_bits(x)
            for start in range(0, 64, 8):
                byte = y[start:start+8]
                byte = int(byte, 2)
                res.append(byte)
        return np.asarray(res).astype(np.float32)

    def step(self, action):
        params = np.round(action).astype(int)
        bits_count = self.compress(params)

        reward = self.evaluate(self.state, bits_count)
        self.idx += 1
        if self.idx >= len(self.timestamps) and self.crt_vec == 'timestamps':
            self.idx = 0
            self.crt_vec = 'metrics'
        elif self.idx >= len(self.metrics) and self.crt_vec == 'metrics':
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_time, state])), reward, True, False, {}

        if self.crt_vec == 'timestamps':
            self.state = self.timestamps[self.idx]
        else:
            self.state = self.metrics[self.idx]

        if self.crt_vec == 'timestamps':
            if len(self.state) == 32:
                return self.bytes(np.concatenate([self.last_2_time, self.state])), reward, False, False, {}
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_time, state])), reward, False, False, {}
        else:
            if len(self.state) == 32:
                return self.bytes(np.concatenate([self.last_2_metric, self.state])), reward, False, False, {}
            state = np.pad(self.state, (0, 32 - len(self.state)), 'constant', constant_values=(-1e14, ))
            return self.bytes(np.concatenate([self.last_2_metric, state])), reward, False, False, {}
        
    def reset(self, seed=None):
        self.get_ts()
        self.crt_vec = 'timestamps'
        return self.bytes(np.concatenate([self.last_2_time, self.state])), {}

    def evaluate(self, original, compressed):
        val = (compressed / (len(original) * 64))
        return -math.tan(val) / 1.2 + 0.6

    def compress(self, params):
        transTypes = [self.paramters[1][params[0]], self.paramters[2][params[1]]]
        transCodings = ['0', '1']
        compressTypes = [self.paramters[4][params[2]], self.paramters[5][params[3]], self.paramters[6][params[4]]]
        compressCodings = ['00', '01', '11']
        offByteShift1 = params[5]
        offByteShift2 = params[6]
        offByteShift3 = params[7]
        offBitmask = params[8] 
        #endian = params[10]

        sum = 0

        for i in range(0, len(self.state) - 1, 2):

            first, second = self.state[i], self.state[i + 1]
            opt1 = 64
            opt2 = 64

            if self.crt_vec == 'timestamps':
                last_2 = self.last_2_time
            else:
                last_2 = self.last_2_metric
            val1 = val2 = 0
            for transformer in transTypes:
                if self.crt_vec == 'metrics':
                    val1 = transformer(last_2[0], last_2[1], first)
                    val2 = transformer(last_2[1], first, second)
                else:
                    val1 = transformer(last_2[0], last_2[1], first)
                    val2 = transformer(last_2[1], first, second)

                if self.crt_vec == 'timestamps':
                    val1 = int_to_bits(val1)
                    val2 = int_to_bits(val2)
                else:
                    val1 = float_to_bits(val1)
                    val2 = float_to_bits(val2)

                for (compresser, param) in  compressTypes:
                    if compresser.__name__ == 'bitmask' and param == 2:
                        if self.crt_vec == 'timestamps':
                            self.last_2_time = [self.state[i], self.state[i + 1]]
                        else:
                            self.last_2_metric = [self.state[i], self.state[i + 1]]
                        continue
                    else:
                        if compresser.__name__ == 'offset':
                            match param:
                                case 1:
                                    opt1 = min(opt1, compresser(val1, offByteShift1))
                                    opt2 = min(opt2, compresser(val2, offByteShift1))
                                case 2:
                                    if offByteShift1 - offByteShift2 >= 0:
                                        opt1 = min(opt1, compresser(val1, offByteShift1 - offByteShift2))
                                        opt2 = min(opt2, compresser(val2, offByteShift1 - offByteShift2))
                                case 3:
                                    if offByteShift1 - offByteShift2 - offByteShift3 >= 0:
                                        opt1 = min(opt1, compresser(val1, offByteShift1 - offByteShift2 - offByteShift3))
                                        opt2 = min(opt2, compresser(val2, offByteShift1 - offByteShift2 - offByteShift3))
                        elif compresser.__name__ == 'bitmask':
                            opt1 = min(opt1, compresser(val1, offBitmask))
                            opt2 = min(opt1, compresser(val2, offBitmask))
                        else:
                            opt1 = min(opt1, compresser(val1))
                            opt2 = min(opt1, compresser(val2))
            if self.crt_vec == 'timestamps':
                self.last_2_time = [self.state[i], self.state[i + 1]]
            else:
                self.last_2_metric = [self.state[i], self.state[i + 1]]
            sum += opt1 + opt2
        return sum


    def render(self):
        #useless
        pass

    def close(self):
        #useless
        pass

In [203]:
env = CompressionEnv()
action = env.action_space.sample()
state, reward, done, truncated, info = env.step(action)
sum = reward
ct = 1
maxi = 0
while not done:
    action = env.action_space.sample()
    state, reward, done, truncarted, info = env.step(action)
    #print(reward, action, state)
    maxi = max(maxi, reward)
    sum += reward
    ct += 1
print(sum / ct, maxi, ct)

-0.10581868685286928 0.5232870047489497 18


In [12]:
from stable_baselines3.common.env_checker import check_env
env = CompressionEnv()
obs, _ = env.reset()
print(obs)
check_env(env)

2024-05-24 20:11:48.918758: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-24 20:11:48.918810: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-24 20:11:48.919905: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-24 20:11:48.925921: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[  0.   0.   0. 110.  55. 251.   4.   0.   0.   0.   0. 110.  61.  33.
  96.   0.   0.   0.   0. 110.  66.  71. 188.   0.   0.   0.   0. 110.
  71. 110.  24.   0.   0.   0.   0. 110.  76. 148. 116.   0.   0.   0.
   0. 110.  81. 186. 208.   0.   0.   0.   0. 110.  86. 225.  44.   0.
   0.   0.   0. 110.  92.   7. 136.   0.   0.   0.   0. 110.  97.  45.
 228.   0.   0.   0.   0. 110. 102.  84.  64.   0.   0.   0.   0. 110.
 107. 122. 156.   0.   0.   0.   0. 110. 112. 160. 248.   0.   0.   0.
   0. 117. 143. 172.  48.   0.   0.   0.   0. 117. 148. 210. 140.   0.
   0.   0.   0. 117. 153. 248. 232.   0.   0.   0.   0. 117. 159.  31.
  68.   0.   0.   0.   0. 117. 164.  69. 160.   0.   0.   0.   0. 117.
 169. 107. 252.   0.   0.   0.   0. 117. 174. 146.  88.   0.   0.   0.
   0. 117. 179. 184. 180.   0.   0.   0.   0. 117. 184. 223.  16.   0.
   0.   0.   0. 117. 190.   5. 108.   0.   0.   0.   0. 117. 195.  43.
 200.   0.   0.   0.   0. 117. 200.  82.  36.   0.   0.   0.   0. 124.
 231. 



In [None]:
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

In [13]:
#Use SAC from stable baselines
from stable_baselines3 import TD3
from stable_baselines3.sac.policies import MlpPolicy, MultiInputPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

env = CompressionEnv()
env = DummyVecEnv([lambda: env])

n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

#Add progress bar for learning
model = TD3('MlpPolicy', env, action_noise=action_noise, verbose=1)
model3 = model.learn(total_timesteps=50000, progress_bar=True)


Using cuda device


Output()

---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 131      |
|    time_elapsed    | 2        |
|    total_timesteps | 387      |
| train/             |          |
|    actor_loss      | 13.1     |
|    critic_loss     | 0.511    |
|    learning_rate   | 0.001    |
|    n_updates       | 286      |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 105      |
|    time_elapsed    | 57       |
|    total_timesteps | 6054     |
| train/             |          |
|    actor_loss      | 3.15     |
|    critic_loss     | 0.541    |
|    learning_rate   | 0.001    |
|    n_updates       | 5953     |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 12       |
|    fps             | 103      |
|    time_elapsed    | 134      |
|    total_timesteps | 13922    |
| train/             |          |
|    actor_loss      | -7.34    |
|    critic_loss     | 0.113    |
|    learning_rate   | 0.001    |
|    n_updates       | 13821    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 16       |
|    fps             | 101      |
|    time_elapsed    | 215      |
|    total_timesteps | 21765    |
| train/             |          |
|    actor_loss      | -15.5    |
|    critic_loss     | 0.117    |
|    learning_rate   | 0.001    |
|    n_updates       | 21664    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 20       |
|    fps             | 101      |
|    time_elapsed    | 318      |
|    total_timesteps | 32363    |
| train/             |          |
|    actor_loss      | -22.7    |
|    critic_loss     | 0.362    |
|    learning_rate   | 0.001    |
|    n_updates       | 32262    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 24       |
|    fps             | 101      |
|    time_elapsed    | 370      |
|    total_timesteps | 37794    |
| train/             |          |
|    actor_loss      | -26      |
|    critic_loss     | 0.239    |
|    learning_rate   | 0.001    |
|    n_updates       | 37693    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 28       |
|    fps             | 101      |
|    time_elapsed    | 371      |
|    total_timesteps | 37899    |
| train/             |          |
|    actor_loss      | -26.3    |
|    critic_loss     | 0.406    |
|    learning_rate   | 0.001    |
|    n_updates       | 37798    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 32       |
|    fps             | 102      |
|    time_elapsed    | 425      |
|    total_timesteps | 43506    |
| train/             |          |
|    actor_loss      | -27.8    |
|    critic_loss     | 0.367    |
|    learning_rate   | 0.001    |
|    n_updates       | 43405    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 36       |
|    fps             | 102      |
|    time_elapsed    | 428      |
|    total_timesteps | 43812    |
| train/             |          |
|    actor_loss      | -27.2    |
|    critic_loss     | 0.693    |
|    learning_rate   | 0.001    |
|    n_updates       | 43711    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 40       |
|    fps             | 102      |
|    time_elapsed    | 481      |
|    total_timesteps | 49160    |
| train/             |          |
|    actor_loss      | -30.3    |
|    critic_loss     | 5.76     |
|    learning_rate   | 0.001    |
|    n_updates       | 49059    |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 44       |
|    fps             | 102      |
|    time_elapsed    | 485      |
|    total_timesteps | 49584    |
| train/             |          |
|    actor_loss      | -31      |
|    critic_loss     | 0.322    |
|    learning_rate   | 0.001    |
|    n_updates       | 49483    |
---------------------------------


In [52]:
model3 = model.learn(total_timesteps=100000, progress_bar=True, log_interval=20)


Output()

---------------------------------
| time/              |          |
|    episodes        | 20       |
|    fps             | 100      |
|    time_elapsed    | 270      |
|    total_timesteps | 27040    |
| train/             |          |
|    actor_loss      | -42      |
|    critic_loss     | 0.285    |
|    learning_rate   | 0.001    |
|    n_updates       | 276639   |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 40       |
|    fps             | 98       |
|    time_elapsed    | 548      |
|    total_timesteps | 54147    |
| train/             |          |
|    actor_loss      | -44.1    |
|    critic_loss     | 0.197    |
|    learning_rate   | 0.001    |
|    n_updates       | 303746   |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 60       |
|    fps             | 96       |
|    time_elapsed    | 786      |
|    total_timesteps | 76087    |
| train/             |          |
|    actor_loss      | -42.6    |
|    critic_loss     | 0.159    |
|    learning_rate   | 0.001    |
|    n_updates       | 325686   |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 80       |
|    fps             | 96       |
|    time_elapsed    | 908      |
|    total_timesteps | 87862    |
| train/             |          |
|    actor_loss      | -43.8    |
|    critic_loss     | 9.98     |
|    learning_rate   | 0.001    |
|    n_updates       | 337461   |
---------------------------------


---------------------------------
| time/              |          |
|    episodes        | 100      |
|    fps             | 96       |
|    time_elapsed    | 984      |
|    total_timesteps | 94993    |
| train/             |          |
|    actor_loss      | -42      |
|    critic_loss     | 0.233    |
|    learning_rate   | 0.001    |
|    n_updates       | 344592   |
---------------------------------


In [53]:
#save the model
model.save("TD3_CompressionEnv")

In [None]:
model2 = TD3('MlpPolicy', env, action_noise=action_noise, verbose=1)

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [213]:
env = CompressionEnv()
obs, _ = env.reset()

action, _states = model.predict(obs)
state, reward, done, truncated, info = env.step(action)
sum = reward
ct = 1
while not done:
    action, _state = model.predict(state)
    state, reward, done, truncarted, info = env.step(action)
    #print(reward)
    sum += reward
    ct += 1
print(sum / ct)
print('ceva')

0.45448963897032
ceva


In [None]:
from transform_primitives import xor, delta_xor, delta, delta_of_delta, rev_delta, rev_delta_of_delta
from compression_primitives import bitmask, offset, trailing_zero
env = CompressionEnv()
#env.get_ts()
print(env.metrics[0])
sample = env.metrics[0]
prev = env.last_2_metric
print(prev)
transformers = [delta, delta_of_delta, rev_delta, rev_delta_of_delta]
avg = 0
ct = 0
for j in range(len(env.metrics)):
    sample = env.metrics[j]
    for i in range(len(sample)):
        ct += 1
        mini = 64
        for t in transformers:
            val = t(prev[0], prev[1], sample[i])
            val = int(num_to_bits(val), 2)
            mini = min(mini, bitmask(val, 0), offset(val, 0), trailing_zero(val), offset(val, 1), offset(val, 2), bitmask(val, 1))
        avg += mini
        prev[0] = prev[1]
        prev[1] = sample[i]
print(avg / ct)
#daily-temperature 61.26 avg
#gold 61.82

In [None]:
action = [3, 0, 1, 3, 0, 1, 0, 0, 0, 0]
env2 = CompressionEnv()
print(env2.state)
state, reward, done, truncated, info = env2.step(action)
print(reward)

In [None]:
df = pd.read_csv('Gold.csv')
x = pd.to_datetime(df['DATE'], format='%Y/%m/%d')
print(x.astype(np.int64))

In [None]:
from utilities import int_to_bits, float_to_bits
print(int_to_bits(57), float_to_bits(2.257))