In [1]:
from typing import Tuple, Callable, Dict, Optional, Union
import numpy as np
import math
import glob
import os
import sys
from saris.utils import load_data
import json
import torch
from tensordict import TensorDict, from_module, from_modules
from tensordict.nn import TensorDictModule
from torchrl.data import ReplayBuffer, LazyMemmapStorage
from saris.utils import utils, pytorch_utils, running_mean


In [2]:
class Config:
    seed: int = 0
    ep_len: int = 1000
    eval_ep_len: int = 1000
    eval_seed: int = 0
    env_id: str = "wireless-sigmap-v0"
    sionna_config_file: str = "/home/hieule/research/saris/configs/sionna_L_multi_users.yaml"
    num_envs:int = 2
    name:str = "sac"
    load_replay_buffer:str = "/home/hieule/research/saris/local_assets/replay_buffers/TD3__L_shape_static__wireless-sigmap-v0__0865b983"
    buffer_size: int = int(100000)
    batch_size: int = 256
    
config = Config()

In [3]:
def normalize_obs(
    flat_obs: torch.Tensor,
    real_channel_rms: running_mean.RunningMeanStd,
    imag_channel_rms: running_mean.RunningMeanStd,
    epsilon: float = 1e-10,
):
    # real_mean = real_channel_rms.mean.to(flat_obs.device)
    # real_var = real_channel_rms.var.to(flat_obs.device)
    real_channel_len = real_channel_rms.mean.shape[0]
    real_channels = flat_obs[..., :real_channel_len]
    # # whittening
    # real_channels = (real_channels - real_mean) / torch.sqrt(real_var + epsilon)
    # scaling
    min_ = real_channel_rms.min.to(flat_obs.device)
    max_ = real_channel_rms.max.to(flat_obs.device)
    real_channels = (real_channels - min_) / (max_ - min_ + epsilon)

    # imag_mean = imag_channel_rms.mean.to(flat_obs.device)
    # imag_var = imag_channel_rms.var.to(flat_obs.device)
    imag_channel_len = imag_channel_rms.mean.shape[0]
    imag_channels = flat_obs[..., real_channel_len : real_channel_len + imag_channel_len]
    # # whittening
    # imag_channels = (imag_channels - imag_mean) / torch.sqrt(imag_var + epsilon)
    # scaling
    min_ = imag_channel_rms.min.to(flat_obs.device)
    max_ = imag_channel_rms.max.to(flat_obs.device)
    imag_channels = (imag_channels - min_) / (max_ - min_ + epsilon)

    # angles
    angle_len = 72
    angles = flat_obs[
        ..., real_channel_len + imag_channel_len : real_channel_len + imag_channel_len + angle_len
    ]
    init_angles = [math.radians(135.0)] + [math.radians(90.0)] * 7
    init_angles = np.concatenate([init_angles] * 9)
    # offset
    angles = torch.sub(angles, torch.tensor(init_angles, device=angles.device, dtype=angles.dtype))
    # normalize
    angles = torch.div(torch.rad2deg(angles), 45.0)

    pos = flat_obs[..., real_channel_len + imag_channel_len + angle_len :]
    flat_obs = torch.cat([real_channels, imag_channels, angles, pos], dim=-1)
    return flat_obs.float()

def update_channel_rmss(
    flat_obs: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
    real_channel_rms: running_mean.RunningMeanStd,
    imag_channel_rms: running_mean.RunningMeanStd,
):
    real_channel_len = np.prod(real_channel_rms.mean.shape)
    real_channel_rms.update(flat_obs[..., :real_channel_len])
    imag_channel_len = np.prod(imag_channel_rms.mean.shape)
    imag_channel_rms.update(flat_obs[..., real_channel_len : real_channel_len + imag_channel_len])


In [4]:
real_channel_rms = running_mean.RunningMeanStd(shape=(1104,))
imag_channel_rms = running_mean.RunningMeanStd(shape=(1104,))
obs_rmss = (real_channel_rms, imag_channel_rms)

In [5]:
replay_buffer_dir = "/home/hieule/research/saris/local_assets/replay_buffers"
rb_dir = os.path.join(replay_buffer_dir, "tmptmp")
rb = ReplayBuffer(
        storage=LazyMemmapStorage(config.buffer_size, scratch_dir=rb_dir, existsok=True),
        batch_size=config.batch_size,
    )

In [6]:
rb.loads(config.load_replay_buffer)
stored_obs = np.asarray(rb.storage.get("observations"))
update_channel_rmss(torch.tensor(stored_obs), obs_rmss[0], obs_rmss[1])

In [12]:
cur_gains = np.asarray(rb['path_gains'])
next_gains = np.asarray(rb['next_path_gains'])
gain_diff = np.mean(next_gains - cur_gains, axis=-1)
mean_gains = np.mean(cur_gains, axis=-1)

In [18]:
adjusted_gains = []
gdifs = []
for i, mean_gain in enumerate(mean_gains[:10]):
    if mean_gain < -95:
        adjusted_gain = -1.5 + np.exp(mean_gain + 95)
    elif mean_gain < -85:
        adjusted_gain = -0.5 + (mean_gain + 95) / 20
    else:
        adjusted_gain = np.log(1 + 85 + mean_gain)
    adjusted_gains.append(adjusted_gain)
    
    gdifs.append(gain_diff[i]*0.05)

print(cur_gains[:10])
print(mean_gains[:10])
print(adjusted_gains)
print(gdifs)
# if mean_gain < -95:
#     adjusted_gain = -1.5 + np.exp(mean_gain + 95)
# elif mean_gain < -85:
#     # linear increase from -0.5 to 0 between -95 and -85
#     adjusted_gain = -0.5 + (mean_gain + 95) / 20
# else:
#     adjusted_gain = np.log(1 + 85 + mean_gain)

[[-106.648186  -95.02979   -99.23008 ]
 [-106.10482  -103.20533  -105.8541  ]
 [-106.62189  -101.21948  -104.80565 ]
 [ -89.63991  -103.06411  -105.8839  ]
 [-106.641075 -103.30127  -105.46685 ]
 [-106.4852   -103.26988  -105.853584]
 [-106.64636  -103.02502  -105.820145]
 [-106.64822  -103.30453   -87.88037 ]
 [-101.65854   -98.42372   -80.20291 ]
 [-106.58824  -103.181015 -105.88792 ]]
[-100.30268  -105.05475  -104.215675  -99.529305 -105.136406 -105.20289
 -105.16384   -99.27771   -93.42838  -105.219055]
[-1.4950217704800441, -1.4999570188338682, -1.4999005320776284, -1.489211823406437, -1.4999603890889, -1.4999629368924503, -1.499961461054154, -1.4861256013514996, -0.4214191436767578, -1.4999635312626658]
[0.3437147855758667, -0.00821533203125, 0.01914939880371094, -0.24702785015106202, 0.03609403073787689, 0.048152923583984375, 0.17575111389160158, -0.2800238370895386, -0.4170111179351807, 0.02130991667509079]


In [24]:
74*0.002

0.148

In [99]:
(adjusted_gains)/10

array([-1.1081574 , -0.57496566, -0.96292955, -1.8330529 , -0.36588973,
       -0.95808715, -1.1893791 , -0.8835228 , -1.3083122 , -1.0495536 ,
       -1.7543709 , -1.1871239 , -0.8969658 , -0.81738204, -1.0799583 ,
       -0.5896965 , -1.1098855 , -1.2157784 , -0.38344422, -1.348706  ,
       -0.38696593, -0.8876457 ,  0.4603302 , -0.15166244, -0.59511185,
       -0.5104378 , -1.0158653 , -0.11572723], dtype=float32)

In [105]:
(adjusted_gains + 0.02*gain_diff)/20

array([-0.05262639,  0.1999363 ,  0.02450562, -0.40777674,  0.30832428,
        0.02093914, -0.09587889,  0.05657829, -0.1586167 , -0.02615253,
       -0.37101308, -0.09380568,  0.04968718,  0.09358583, -0.04027845,
        0.19889092, -0.04353157, -0.108305  ,  0.30824268, -0.16974244,
        0.31498998,  0.06353697,  0.7229349 ,  0.4264663 ,  0.19823654,
        0.2487282 , -0.00936182,  0.43931222], dtype=float32)

In [87]:
(adjusted_gains + 0.025*gain_diff - 0.02 * 40)/30

array([-0.39242175, -0.24977407, -0.3327172 , -0.6158101 , -0.17045702,
       -0.34607226, -0.42609972, -0.32532504, -0.47392222, -0.37995714,
       -0.59602606, -0.42298397, -0.3302301 , -0.29343522, -0.38740098,
       -0.23888423, -0.36810064, -0.4329657 , -0.15456945, -0.46470886,
       -0.1344729 , -0.304149  ,  0.10870124, -0.07147709, -0.23555613,
       -0.18694483, -0.36886132, -0.07230286], dtype=float32)

In [None]:
avg_cur_gains = np.mean(cur_gains, axis=-1)
avg_cur_gains

array([-91.08157 , -85.74966 , -89.629295, -98.33053 , -83.6589  ,
       -89.58087 , -91.89379 , -88.83523 , -93.08312 , -90.49554 ,
       -97.54371 , -91.87124 , -88.96966 , -88.17382 , -90.79958 ,
       -85.896965, -91.098854, -92.15778 , -83.83444 , -93.48706 ,
       -83.86966 , -88.87646 , -75.3967  , -81.516624, -85.95112 ,
       -85.10438 , -90.15865 , -81.15727 ], dtype=float32)

In [34]:
gain_diff = np.sum(next_gains - cur_gains, axis=-1)
gain_diff * 0.05

array([ 0.21784249, -1.8871304 ,  0.89555854,  1.3124489 , -1.3096253 ,
       -0.00259171, -0.17840004, -0.24904709, -0.6690888 , -0.20635377,
        0.9258545 , -0.03656082, -0.27448884,  0.34152755, -0.04489212,
       -0.9391228 ,  1.7116692 , -0.06237335, -0.00528297,  0.69159013,
        1.2709442 ,  1.1039753 , -1.0845295 ,  0.34462357, -0.6311302 ,
        0.59206545, -0.2143734 , -0.42362672], dtype=float32)

In [47]:
rewards = avg_cur_gains + gain_diff * 0.05
(rewards + 80)/80

array([-0.13579664, -0.09545984, -0.10917167, -0.21272603, -0.06210651,
       -0.11979332, -0.15090236, -0.11355343, -0.17190266, -0.13377361,
       -0.20772314, -0.14884749, -0.11555185, -0.09790363, -0.13555594,
       -0.08545113, -0.1173398 , -0.15275192, -0.04799652, -0.15994339,
       -0.03248396, -0.09715605,  0.0439847 , -0.01464996, -0.08227806,
       -0.05640392, -0.1296628 , -0.01976128], dtype=float32)

In [44]:
-60/80

-0.75

In [38]:
rewards = total_cur_gains + gain_diff * 0.05
rewards

array([-82.491615, -78.040306, -77.27826 , -91.35905 , -77.49554 ,
       -78.06777 , -79.97712 , -83.04637 , -83.51123 , -78.3449  ,
       -91.81504 , -76.17207 , -77.59145 , -77.30848 , -83.05356 ,
       -81.49085 , -81.196335, -82.56216 , -74.7204  , -77.6953  ,
       -72.27406 , -79.524605, -71.08144 , -76.07428 , -74.541595,
       -73.32467 , -81.23369 , -73.20522 ], dtype=float32)

In [7]:
print(np.max(stored_obs[..., :1104], axis=0))
print(np.min(stored_obs[..., :1104], axis=0))


[3.5390137e-06 5.2522237e-06 5.1776506e-06 ... 5.4840098e-06 1.4956104e-06
 1.5843410e-06]
[-4.4215444e-06 -4.2042370e-06 -6.4661021e-06 ... -6.3247408e-06
 -1.7040716e-06 -1.4679688e-06]


In [8]:
print(f"real mean: {obs_rmss[0].mean}")
print(f"real var: {obs_rmss[0].var}")
print(f"real max: {obs_rmss[0].max}")
print(f"real min: {obs_rmss[0].min}")
print(f"real normalized_max: {obs_rmss[0].normalized_max}")
print(f"real normalized_min: {obs_rmss[0].normalized_min}")

real mean: tensor([-4.1666e-08,  4.9725e-08, -6.1573e-08,  ..., -2.1733e-09,
        -4.3409e-09,  5.0589e-09])
real var: tensor([1.9988e-13, 2.7937e-13, 4.1933e-13,  ..., 2.6501e-13, 3.8605e-14,
        3.1103e-14])
real max: tensor([3.5390e-06, 5.2522e-06, 5.1777e-06,  ..., 5.4840e-06, 1.4956e-06,
        1.5843e-06])
real min: tensor([-4.4215e-06, -4.2042e-06, -6.4661e-06,  ..., -6.3247e-06,
        -1.7041e-06, -1.4680e-06])
real normalized_max: tensor([1., 1., 1.,  ..., 1., 1., 1.])
real normalized_min: tensor([1., 1., 1.,  ..., 1., 1., 1.])


In [30]:
obs = rb['observations']
b=normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,:]

In [31]:
torch.min(b[...,:1104], dim =0).values

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [32]:
torch.max(b[...,:1104], dim =0).values

tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000])

In [23]:
obs[20000,:1104]

MemoryMappedTensor([-9.3064e-08,  1.1109e-07, -1.3749e-07,  ...,
                    -1.9914e-08,  1.7944e-08, -1.6314e-08])

In [15]:
print(f"real: {b[:1104]}")
print(f"imag: {b[1104:2208]}")
print(f"angle: {b[2208:2280]}")
print(f"pos: {b[2280:]}")

real: tensor([0.5431, 0.4559, 0.5431,  ..., 0.5335, 0.5365, 0.4740])
imag: tensor([0.5105, 0.4909, 0.5045,  ..., 0.5050, 0.5280, 0.4744])
angle: tensor([-0.6667,  0.1943,  0.1575,  0.1204,  0.0831,  0.0456,  0.0081, -0.0294,
        -0.6667,  0.5613,  0.5315,  0.5012,  0.4702,  0.4386,  0.4065,  0.3738,
         0.1456, -0.4751, -0.5161, -0.5559, -0.5947, -0.6323, -0.6667, -0.6667,
        -0.6667,  0.2754,  0.2318,  0.1876,  0.1429,  0.0979,  0.0526,  0.0072,
        -0.1305, -0.5939, -0.6187, -0.6429, -0.6667, -0.6667, -0.6667, -0.6667,
        -0.3208,  0.1271,  0.0998,  0.0725,  0.0451,  0.0176, -0.0099, -0.0373,
         0.5999,  0.3432,  0.3133,  0.2830,  0.2525,  0.2216,  0.1905,  0.1591,
         0.3894, -0.0422, -0.0767, -0.1111, -0.1454, -0.1794, -0.2132, -0.2466,
         0.5192,  0.1542,  0.1223,  0.0902,  0.0580,  0.0257, -0.0066, -0.0389])
pos: tensor([-10.0000,  -2.7500,   1.5000,  -8.0000,  -4.2500,   1.5000, -11.0000,
         -3.2500,   1.5000,   2.0922,  -3.8880,   2

In [19]:
obs = rb['observations']

min_obs = torch.min(obs[...,:1104], axis=0).values
max_obs = torch.max(obs[...,:1104], axis=0).values
print(f"max_obs: {max_obs}")
print(f"min_obs: {min_obs}")

normalized_min_obs = (obs[...,:1104] - min_obs) / (max_obs - min_obs)


b=normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,:1104]
max_b = torch.max(b, dim=0).values
min_b = torch.min(b, dim=0).values
print(f"max_b: {max_b}")
print(f"min_b: {min_b}")

tmp = (b - min_b) / (max_b - min_b)
print(torch.max(tmp, dim=0).values)
print(torch.min(tmp, dim=0).values)

max_obs: tensor([3.5390e-06, 5.2522e-06, 5.1777e-06,  ..., 5.4840e-06, 1.4956e-06,
        1.5843e-06])
min_obs: tensor([-4.4215e-06, -4.2042e-06, -6.4661e-06,  ..., -6.3247e-06,
        -1.7041e-06, -1.4680e-06])
max_b: tensor([0.0358, 0.0520, 0.0524,  ..., 0.0549, 0.0150, 0.0158])
min_b: tensor([-0.0438, -0.0425, -0.0640,  ..., -0.0632, -0.0170, -0.0147])
tensor([1., 1., 1.,  ..., 1., 1., 1.])
tensor([0., 0., 0.,  ..., 0., 0., 0.])


In [17]:
tmp

tensor([[0.8838, 0.1171, 0.8816,  ..., 0.6036, 0.4370, 0.5761],
        [0.5790, 0.4202, 0.5808,  ..., 0.3509, 0.5475, 0.4342],
        [0.7300, 0.2748, 0.7195,  ..., 0.5501, 0.9099, 0.0907],
        ...,
        [0.5482, 0.4518, 0.5482,  ..., 0.5312, 0.5476, 0.4662],
        [0.5369, 0.4630, 0.5371,  ..., 0.5321, 0.5447, 0.4691],
        [0.5517, 0.4484, 0.5514,  ..., 0.5317, 0.5459, 0.4678]])

In [13]:
b.shape

torch.Size([20736, 1104])

In [19]:
obs = rb['observations'][:20735]
obs.shape

torch.Size([20735, 2292])

In [11]:
obs[...,:1104]

MemoryMappedTensor([[ 1.5021e-07, -1.7631e-07,  2.1409e-07,  ...,
                      1.7353e-07, -1.6117e-07,  1.5047e-07]])

In [20]:
a=normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,:1104]
print(torch.max(torch.abs(a)))
print(torch.min(torch.abs(a)))

tensor(0.7777)
tensor(0.2691)


In [14]:
print(f"obs_rmss[0].max: {obs_rmss[0].max}")
print(f"obs_rmss[0].min: {obs_rmss[0].min}")
print(f"obs_rmss[1].max: {obs_rmss[1].max}")
print(f"obs_rmss[1].min: {obs_rmss[1].min}")

obs_rmss[0].max: tensor([3.5390e-06, 5.2522e-06, 5.1777e-06,  ..., 5.4840e-06, 1.4956e-06,
        1.5843e-06])
obs_rmss[0].min: tensor([-4.4215e-06, -4.2042e-06, -6.4661e-06,  ..., -6.3247e-06,
        -1.7041e-06, -1.4680e-06])
obs_rmss[1].max: tensor([3.7836e-06, 4.6288e-06, 5.5574e-06,  ..., 5.7870e-06, 2.1018e-06,
        2.1972e-06])
obs_rmss[1].min: tensor([-3.9293e-06, -4.4992e-06, -5.6388e-06,  ..., -5.8671e-06,
        -2.4127e-06, -1.9483e-06])


In [15]:
print(obs_rmss[0].normalized_max)

tensor([ 8.0091,  9.8430,  8.0908,  ..., 10.6571,  7.6341,  8.9549])


In [16]:
a=normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,:1104]
print(a)
print(torch.max(torch.abs(a)))
print(torch.min(torch.abs(a)))

tensor([[0.5503, 0.4497, 0.5502,  ..., 0.5355, 0.5311, 0.4827]])
tensor(0.7771)
tensor(0.2698)


In [17]:
a=normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,1104:1104+1104]
print(a)
print(torch.max(torch.abs(a)))
print(torch.min(torch.abs(a)))

tensor([[0.5164, 0.4860, 0.5104,  ..., 0.5072, 0.5237, 0.4809]])
tensor(0.6640)
tensor(0.2145)


In [22]:
a=normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,:1104]
print(a)
print(torch.max(torch.abs(a)))
print(torch.min(torch.abs(a)))


tensor([[ 0.0019, -0.0023,  0.0028,  ...,  0.0018, -0.0016,  0.0015]])
tensor(0.1093)
tensor(1.6117e-06)


In [9]:
normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,:1104]

tensor([[ 15.0208, -17.6304,  21.4082,  ...,  17.3526, -16.1169,  15.0469]])

In [10]:
obs[...,1104: 1104+1104]

MemoryMappedTensor([[ 5.9157e-08, -6.8491e-08,  8.1607e-08,  ...,
                     -1.4328e-08,  1.3809e-08, -1.3292e-08]])

In [11]:
normalize_obs(obs, obs_rmss[0], obs_rmss[1])[...,1104: 1104+1104]

tensor([[ 5.9156, -6.8489,  8.1604,  ..., -1.4328,  1.3809, -1.3292]])

In [25]:
init_angles = [math.radians(135.0)] + [math.radians(90.0)] * 7
init_angles = np.concatenate([init_angles] * 9)
init_angles

array([2.35619449, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 2.35619449, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 2.35619449, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 2.35619449,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 1.57079633, 2.35619449, 1.57079633, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       2.35619449, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 2.35619449, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 2.35619449, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 2.35619449,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 1.57079633,
       1.57079633, 1.57079633])

In [None]:
angles = np.array(rb['observations'][20735][-84:-12])

In [33]:
angles = np.array(rb['observations'][20735][-84:-12])
print(angles)
offset = angles - init_angles
print(f"offset: {offset}")
print(f"offset in degrees: {np.degrees(offset)}")

[1.9896753 1.1867217 1.138107  1.09158   1.0471976 1.0471976 1.0471976
 1.0471976 2.8797932 2.0943952 2.09211   2.0859375 2.0797215 2.0734622
 2.0671597 2.0608137 1.8325957 1.0884247 1.0744745 1.060732  1.0471976
 1.0471976 1.0471976 1.0471976 2.78053   2.0943952 2.0943952 2.0943952
 2.0943952 2.0500128 2.0034878 1.9548732 2.8797932 2.0943952 2.0943952
 2.0943952 2.0943952 2.0881872 2.081935  2.0756388 2.8797932 2.0943952
 2.0943952 2.0943952 2.0943952 2.0500128 2.0034878 1.9548732 2.8797932
 1.4679533 1.4535496 1.4391946 1.4248939 1.410653  1.3964773 1.382372
 1.8325957 2.0943952 2.0943952 2.0943952 2.0943952 2.053379  2.0105243
 1.965868  1.8325957 2.0943952 2.0943952 2.0943952 2.0943952 2.0751016
 2.0553882 2.035257 ]
offset: [-0.36651921 -0.38407464 -0.43268938 -0.47921629 -0.52359875 -0.52359875
 -0.52359875 -0.52359875  0.52359868  0.52359883  0.52131359  0.51514117
  0.50892512  0.50266592  0.49636333  0.49001734 -0.52359878 -0.48237164
 -0.49632187 -0.51006432 -0.52359875 -0.52

In [27]:
offset = angles - init_angles
offset

array([ 0.06316352,  0.03851752,  0.01531367, -0.00790675, -0.03111859,
       -0.05429705, -0.07741721, -0.10045464,  0.01431943,  0.02105193,
       -0.00958605, -0.04020603, -0.07075091, -0.10116406, -0.13138934,
       -0.16137655, -0.03092861,  0.0602961 ,  0.03736274,  0.01438992,
       -0.00859805, -0.03157695, -0.05452247, -0.07741077,  0.05629326,
        0.13126199,  0.11051743,  0.0896769 ,  0.06875805,  0.04777865,
        0.026758  ,  0.00571279,  0.08832431, -0.0229059 , -0.04744895,
       -0.07193489, -0.09633489, -0.12062021, -0.14476355, -0.16873808,
       -0.02204823,  0.01128678, -0.01037593, -0.03202899, -0.053652  ,
       -0.07522483, -0.09672697, -0.11814053,  0.088938  ,  0.04018955,
        0.01553516, -0.00913818, -0.03380032, -0.05842145, -0.08297189,
       -0.10742255, -0.00653362,  0.15716962,  0.13604896,  0.11480562,
        0.0934575 ,  0.07202379,  0.05052452,  0.02897732,  0.05667067,
        0.04013376,  0.01753454, -0.00508256, -0.02769454, -0.05

In [28]:
print(f"init angles in degrees: {np.degrees(init_angles)}")
print(f"angles in degrees: {np.degrees(angles)}")
print(f"offset in degrees: {np.degrees(offset)}")

init angles in degrees: [135.  90.  90.  90.  90.  90.  90.  90. 135.  90.  90.  90.  90.  90.
  90.  90. 135.  90.  90.  90.  90.  90.  90.  90. 135.  90.  90.  90.
  90.  90.  90.  90. 135.  90.  90.  90.  90.  90.  90.  90. 135.  90.
  90.  90.  90.  90.  90.  90. 135.  90.  90.  90.  90.  90.  90.  90.
 135.  90.  90.  90.  90.  90.  90.  90. 135.  90.  90.  90.  90.  90.
  90.  90.]
angles in degrees: [138.619     92.20689   90.8774    89.546974  88.21703   86.889
  85.564316  84.24437  135.82043   91.206184  89.45075   87.69636
  85.946266  84.20372   82.47194   80.7538   133.22792   93.454704
  92.140724  90.82448   89.50736   88.19077   86.87609   85.56468
 138.22536   97.52075   96.33218   95.1381    93.939545  92.73751
  91.53312   90.32732  140.06061   88.687584  87.28137   85.87843
  84.480415  83.08897   81.70566   80.332016 133.73672   90.64668
  89.405495  88.16487   86.925964  85.689926  84.45795   83.23104
 140.09576   92.30269   90.89009   89.47642   88.06338   86.652

In [29]:
offset_deg = np.degrees(offset)
normalized_offset = offset_deg / 45.0
normalized_offset

array([ 0.0804223 ,  0.04904203,  0.01949797, -0.01006719, -0.03962142,
       -0.06913315, -0.09857065, -0.12790283,  0.01823206,  0.02680415,
       -0.01220534, -0.05119191, -0.09008285, -0.12880608, -0.1672901 ,
       -0.20547101, -0.03937952,  0.07677138,  0.04757172,  0.01832181,
       -0.01094737, -0.04020502, -0.06942017, -0.09856246,  0.0716748 ,
        0.16712795,  0.14071516,  0.11418018,  0.08754547,  0.06083367,
        0.03406934,  0.00727375,  0.11245801, -0.0291647 , -0.06041388,
       -0.09159035, -0.12265739, -0.15357842, -0.18431867, -0.214844  ,
       -0.02807268,  0.01437077, -0.01321105, -0.04078058, -0.06831185,
       -0.09577923, -0.1231566 , -0.1504212 ,  0.11323938,  0.05117092,
        0.01977998, -0.0116351 , -0.0430359 , -0.0743845 , -0.10564309,
       -0.13677463, -0.00831886,  0.20011458,  0.17322291,  0.14617506,
        0.11899379,  0.09170354,  0.06432981,  0.03689507,  0.07215534,
        0.05109989,  0.02232567, -0.00647132, -0.03526178, -0.06

In [6]:
data = {}
data_names = [
        "observations",
        "actions",
        "rewards",
        "next_observations",
        "truncations",
        "terminations",
    ]
for i, sub_dir in enumerate(glob.glob(replay_buffer_dir + "/SAC*")):
    print(f"Loading data from {sub_dir}")
    for name in data_names:
        filename = os.path.join(sub_dir, f"{name}.txt")
        with open(filename, "r") as f:
            d_ = f.readlines()
            d_ = [list(json.loads(d).values())[0] for d in d_]
            d_ = np.array(d_)
            if name == "truncations":
                d_[-1, :, :] = 1
            d_ = np.transpose(d_, (1, 0, 2))
            d_ = np.reshape(d_, (-1, d_.shape[-1]))
            if data.get(name) is None:
                data[name] = d_
            else:
                data[name] = np.concatenate([data[name], d_], axis=0)
    for k, v in data.items():
        print(k, v.shape)
    print()

# print("Saving data")
# offline_replay_buffer_dir = os.path.join(replay_buffer_dir, "offline")
# os.makedirs(offline_replay_buffer_dir, exist_ok=True)
# for k, v in data.items():
#     np.save(os.path.join(offline_replay_buffer_dir, f"{k}.npy"), v)

Loading data from /home/hieule/research/saris/local_assets/replay_buffers/SAC__L_shape_static__wireless-sigmap-v0__trained


FileNotFoundError: [Errno 2] No such file or directory: '/home/hieule/research/saris/local_assets/replay_buffers/SAC__L_shape_static__wireless-sigmap-v0__trained/observations.txt'

In [5]:
data['rewards'].shape

(53792, 1)

In [6]:
min_reward = np.min(data['rewards'])
max_reward = np.max(data['rewards'])
mean_reward = np.mean(data['rewards'])
std_reward = np.std(data['rewards'])
print(f"min_reward: {min_reward}")
print(f"max_reward: {max_reward}")
print(f"mean_reward: {mean_reward}")
print(f"std_reward: {std_reward}")

min_reward: -35.05443771649152
max_reward: 3.2991380989551544
mean_reward: -22.54736009551046
std_reward: 8.381205452487608
