In [17]:
import os
os.chdir('/root/DiffusionBasedRL') 

In [18]:
# Change This
from config.maze2d_dit import base
config = base['diffusion']

In [19]:
from loguru import logger
import os
logger.info(os.getcwd())
import sys
logger.info(sys.executable)
sys.path.append(os.getcwd())
import numpy as np

from diffuser.models.dit import LDiT_models

import diffuser.utils as utils


#-----------------------------------------------------------------------------#
#----------------------------------- setup -----------------------------------#
#-----------------------------------------------------------------------------#

class argsmaker:
    # Change this
    dataset: str = 'maze2d-large-v1' #maze2d-large-v1, maze2d-medium-v1, maze2d-umaze-v1
    def __init__(self, inputargs):
        for k, v in inputargs.items():
            setattr(self, k, v)
        # Change logbase, horizon and n_diffusion_steps
        self.logbase = f'logs_dit_final_1M' #logs_dif_UMAZE_1M, logs_dit_final_1M
        self.horizon: int = 384 #384 for large, 256 for medium, 128 for small 
        self.n_diffusion_steps: int = 256 # 256 for large, 256 for medium, 64 for small
        self.savepath = f'{self.logbase}/{self.dataset}/diffusion/H{self.horizon}_T{self.n_diffusion_steps}'

args = argsmaker(config)

[32m2024-06-07 19:24:15.852[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1m/root/DiffusionBasedRL[0m
[32m2024-06-07 19:24:15.854[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1m/opt/conda/envs/diffuser/bin/python[0m


In [20]:
#-----------------------------------------------------------------------------#
#---------------------------------- dataset ----------------------------------#
#-----------------------------------------------------------------------------#

dataset_config = utils.Config(
    args.loader,
    savepath=(args.savepath, 'dataset_config.pkl'),
    env=args.dataset,
    horizon=args.horizon,
    normalizer=args.normalizer,
    preprocess_fns=args.preprocess_fns,
    use_padding=args.use_padding,
    max_path_length=args.max_path_length,
)

render_config = utils.Config(
    args.renderer,
    savepath=(args.savepath, 'render_config.pkl'),
    env=args.dataset,
)

dataset = dataset_config()
renderer = render_config()

observation_dim = dataset.observation_dim
action_dim = dataset.action_dim

[ utils/config ] Imported diffuser.datasets:GoalDataset

[utils/config ] Config: <class 'diffuser.datasets.sequence.GoalDataset'>
    env: maze2d-large-v1
    horizon: 384
    max_path_length: 40000
    normalizer: LimitsNormalizer
    preprocess_fns: ['maze2d_set_terminals']
    use_padding: False

[ utils/config ] Saved config to: logs_dit_final_1M/maze2d-large-v1/diffusion/H384_T256/dataset_config.pkl

[ utils/config ] Imported diffuser.utils:Maze2dRenderer

[utils/config ] Config: <class 'diffuser.utils.rendering.Maze2dRenderer'>
    env: maze2d-large-v1

[ utils/config ] Saved config to: logs_dit_final_1M/maze2d-large-v1/diffusion/H384_T256/render_config.pkl



load datafile: 100%|██████████| 8/8 [00:01<00:00,  7.05it/s]


[ utils/preprocessing ] Segmented maze2d-large-v1 | 1061 paths | min length: 67 | max length: 30470
[ datasets/buffer ] Finalized replay buffer | 1062 episodes
[ datasets/buffer ] Fields:
    actions: (1062, 40000, 2)
    infos/goal: (1062, 40000, 2)
    infos/qpos: (1062, 40000, 2)
    infos/qvel: (1062, 40000, 2)
    observations: (1062, 40000, 4)
    rewards: (1062, 40000, 1)
    terminals: (1062, 40000, 1)
    timeouts: (1062, 40000, 1)
    next_observations: (1062, 40000, 4)
    normed_observations: (1062, 40000, 4)
    normed_actions: (1062, 40000, 2)


In [21]:

#-----------------------------------------------------------------------------#
#------------------------------ model & trainer ------------------------------#
#-----------------------------------------------------------------------------#

diffusion_config = utils.Config(
    args.diffusion,
    savepath=(args.savepath, 'diffusion_config.pkl'),
    horizon=args.horizon,
    observation_dim=observation_dim,
    action_dim=action_dim,
    n_timesteps=args.n_diffusion_steps,
    loss_type=args.loss_type,
    clip_denoised=args.clip_denoised,
    predict_epsilon=args.predict_epsilon,
    ## loss weighting
    action_weight=args.action_weight,
    loss_weights=args.loss_weights,
    loss_discount=args.loss_discount,
    device=args.device,
)

trainer_config = utils.Config(
    utils.Trainer,
    savepath=(args.savepath, 'trainer_config.pkl'),
    train_batch_size=args.batch_size,
    train_lr=args.learning_rate,
    gradient_accumulate_every=args.gradient_accumulate_every,
    ema_decay=args.ema_decay,
    sample_freq=args.sample_freq,
    save_freq=args.save_freq,
    label_freq=int(args.n_train_steps // args.n_saves),
    save_parallel=args.save_parallel,
    results_folder=args.savepath,
    bucket=args.bucket,
    n_reference=args.n_reference,
    n_samples=args.n_samples)

[ utils/config ] Imported diffuser.models:GaussianDiffusion

[utils/config ] Config: <class 'diffuser.models.diffusion.GaussianDiffusion'>
    action_dim: 2
    action_weight: 1
    clip_denoised: True
    horizon: 384
    loss_discount: 1
    loss_type: l2
    loss_weights: None
    n_timesteps: 256
    observation_dim: 4
    predict_epsilon: False

[ utils/config ] Saved config to: logs_dit_final_1M/maze2d-large-v1/diffusion/H384_T256/diffusion_config.pkl


[utils/config ] Config: <class 'diffuser.utils.training.Trainer'>
    bucket: None
    ema_decay: 0.995
    gradient_accumulate_every: 2
    label_freq: 20000
    n_reference: 50
    n_samples: 10
    results_folder: logs_dit_final_1M/maze2d-large-v1/diffusion/H384_T256
    sample_freq: 10000
    save_freq: 5000
    save_parallel: False
    train_batch_size: 32
    train_lr: 0.0002

[ utils/config ] Saved config to: logs_dit_final_1M/maze2d-large-v1/diffusion/H384_T256/trainer_config.pkl



In [36]:
#-----------------------------------------------------------------------------#
#-------------------------------- instantiate --------------------------------#
#-----------------------------------------------------------------------------#

if 'DiT' in args.model:
    model = LDiT_models[args.model](
        in_channels = observation_dim + action_dim,
        max_in_len = args.horizon,
    )
    model = model.to(device=args.device)
else:
    model_config = utils.Config(
        args.model,
        savepath=(args.savepath, 'model_config.pkl'),
        horizon=args.horizon,
        transition_dim=observation_dim + action_dim,
        cond_dim=observation_dim,
        dim_mults=args.dim_mults,
        device=args.device,
    )
    model = model_config()


diffusion = diffusion_config(model)

trainer = trainer_config(diffusion, dataset, renderer)

In [37]:
checkpoints = sorted([int(x.split('_')[1].split('.')[0]) for x in os.listdir(trainer.logdir) if x.endswith('.pt')])
# TODO: Change this to 1M
checkpoints = [360000]

In [38]:
from diffuser.guides.policies import Policy

In [39]:
print(checkpoints[-1])
trainer.load(checkpoints[-1])
diffusion = trainer.model

360000


In [40]:
import diffuser.datasets as datasets
env = datasets.load_environment(args.dataset)

In [46]:
observation = env.reset()
print(observation)

[ 6.91533664  1.90086368 -0.04659669  0.05104924]


In [47]:
target = env._target
print(target)

(7, 9)


In [43]:
cond = {
    diffusion.horizon - 1: np.array([*target, 0, 0]),
}

In [44]:
## observations for rendering
rollout = [observation.copy()]


In [48]:
total_reward = 0

In [49]:
from loguru import logger

In [55]:
policy = Policy(diffusion, dataset.normalizer)
batch_size = 1

In [57]:
vis_freq = 10

In [60]:
for t in range(env.max_episode_steps):

        state = env.state_vector().copy()
        logger.info(f'state: {state}')

        ## can replan if desired, but the open-loop plans are good enough for maze2d
        ## that we really only need to plan once
        if t == 0:
            # logger.info(f'planning at t: {t}')
            cond[0] = observation

            action, samples = policy(cond, batch_size=batch_size)
            actions = samples.actions[0]
            sequence = samples.observations[0]
            # logger.info(f'actions: {actions}')
            # logger.info(f'samples: {samples}')
            # logger.info(f'sequence: {sequence}')
        # pdb.set_trace()

        # ####
        if t < len(sequence) - 1:
            next_waypoint = sequence[t+1]
        else:
            next_waypoint = sequence[-1].copy()
            next_waypoint[2:] = 0
            # pdb.set_trace()
        logger.info(f'next_waypoint: {next_waypoint}')
        ## can use actions or define a simple controller based on state predictions
        action = next_waypoint[:2] - state[:2] + (next_waypoint[2:] - state[2:])
        logger.info(f'action: {action}')
        # pdb.set_trace()
        ####

        # else:
        #     actions = actions[1:]
        #     if len(actions) > 1:
        #         action = actions[0]
        #     else:
        #         # action = np.zeros(2)
        #         action = -state[2:]
        #         pdb.set_trace()



        next_observation, reward, terminal, _ = env.step(action)
        logger.info(f'next_observation: {next_observation}')
        logger.info(f'reward: {reward}')
        logger.info(f'terminal: {terminal}')
        logger.info(f'_: {_}')
        total_reward += reward
        score = env.get_normalized_score(total_reward)
        print(
            f't: {t} | r: {reward:.2f} |  R: {total_reward:.2f} | score: {score:.4f} | ref_max_score: {env.ref_max_score} | ref_min_score: {env.ref_min_score}'
            f'{action}'
        )

        xy = next_observation[:2]
        goal = env.unwrapped._target
        print(f'maze | pos: {xy} | goal: {goal}')

        ## update rollout observations
        rollout.append(next_observation.copy())

        # logger.log(score=score, step=t)

        if terminal:
            break

        observation = next_observation


[32m2024-06-07 09:04:46.352[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 6.91049418  1.90342088 -0.22499396  0.12603058][0m



[F                                                                                                    
[F1 / 256 [                                                            ]   0% | 156.6 Hz
t : 255
[F[F                                                                                                    
                                                                                                    
[F[F2 / 256 [                                                            ]   0% | 167.9 Hz
t : 254
[F[F                                                                                                    
                                                                                                    
[F[F3 / 256 [                                                            ]   1% | 173.4 Hz
t : 253
[F[F                                                                                                    
                                                                        

[32m2024-06-07 09:04:47.748[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 6.861636    1.93315    -0.36993265  0.10362434][0m
[32m2024-06-07 09:04:47.749[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.1937967   0.00732294][0m
[32m2024-06-07 09:04:47.751[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 6.90778804  1.90469563 -0.27061348  0.12747448][0m
[32m2024-06-07 09:04:47.751[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:47.751[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:47.752[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:47.753[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 6.90778804 

[F[F                                                                                                    
                                                                                                    
[F[F227 / 256 [#####################################################       ]  88% | 184.8 Hz
t : 29
[F[F                                                                                                    
                                                                                                    
[F[F228 / 256 [#####################################################       ]  89% | 184.7 Hz
t : 28
[F[F                                                                                                    
                                                                                                    
[F[F229 / 256 [#####################################################       ]  89% | 184.6 Hz
t : 27
[F[F                                                                

[32m2024-06-07 09:04:47.789[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.83980513  0.17238616][0m
[32m2024-06-07 09:04:47.791[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 6.85300053  1.92210322 -1.28579579  0.34349936][0m
[32m2024-06-07 09:04:47.791[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:47.791[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:47.792[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:47.793[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 6.85300053  1.92210322 -1.28579579  0.34349936][0m
[32m2024-06-07 09:04:47.793[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 6.737054   

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 7 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.83980513  0.17238616]
maze | pos: [6.85300053 1.92210322] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 8 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.89870337  0.37458849]
maze | pos: [6.83803281 1.92642217] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 9 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.95752168  0.43707015]
maze | pos: [6.82082027 1.93177178] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 10 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.84392941  0.46367726]
maze | pos: [6.80163878 1.

[32m2024-06-07 09:04:47.990[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:47.991[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:47.992[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 5.35758132  2.19981288 -5.08934211  1.38972914][0m
[32m2024-06-07 09:04:47.993[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 5.1375065  2.2568176 -5.226232   1.33002  ][0m
[32m2024-06-07 09:04:47.993[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.35696477 -0.00270449][0m
[32m2024-06-07 09:04:47.994[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 5.30685024  2.19914985 -5.07310791 -0.0663032 ][0m
[32m2024-06-07 09:04:47.995[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[

t: 45 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.4251288   0.12092888]
maze | pos: [5.35758132 2.19981288] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 46 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.35696477 -0.00270449]
maze | pos: [5.30685024 2.19914985] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 47 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.36130724  1.31715007]
maze | pos: [5.25647085 2.19554016] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 48 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.4062927   1.60191109]
maze | pos: [5.20562229 2.1943207 ] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sp

[32m2024-06-07 09:04:48.192[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 4.2577276   1.869642   -0.56531715 -2.7011378 ][0m
[32m2024-06-07 09:04:48.193[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.70509018  0.3601951 ][0m
[32m2024-06-07 09:04:48.194[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 4.40165157e+00  1.81057608e+00 -2.87775552e-03 -2.93864767e+00][0m
[32m2024-06-07 09:04:48.195[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:48.196[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:48.196[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:48.197[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstat

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 84 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.70509018  0.3601951 ]
maze | pos: [4.40165157 1.81057608] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 85 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.78811612  0.52968387]
maze | pos: [4.40162046 1.78252111] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 86 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.65124002  0.73897392]
maze | pos: [4.40161928 1.75629292] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 87 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.92663503  0.74480841]
maze | pos: [4.40158034

[32m2024-06-07 09:04:48.395[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [4.40159762e+00 2.09216867e+00 3.08497999e-03 4.10759519e+00][0m
[32m2024-06-07 09:04:48.396[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:48.397[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:48.398[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:48.399[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [4.40159762e+00 2.09216867e+00 3.08497999e-03 4.10759519e+00][0m
[32m2024-06-07 09:04:48.400[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [3.8790984  2.3455238  0.08241272 4.068431  ][0m
[32m2024-06-07 09:04:48.401[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>

t: 120 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.55393216  0.48515993]
maze | pos: [4.40159762 2.09216867] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 121 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.44317147  0.21419088]
maze | pos: [4.40163329 2.13365692] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 122 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.434233    0.09006032]
maze | pos: [4.40165934 2.17526085] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 123 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.36286315 -0.02703937]
maze | pos: [4.40168783 2.21670129] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0

[32m2024-06-07 09:04:48.600[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 3.655783   3.4009683 -0.5315633  4.436371 ][0m
[32m2024-06-07 09:04:48.601[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-1.27727811  5.63884153][0m
[32m2024-06-07 09:04:48.604[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 4.40149796e+00  2.19850207e+00 -1.10715536e-07 -2.52457527e-06][0m
[32m2024-06-07 09:04:48.605[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:48.606[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:48.606[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:48.607[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 152 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-1.27727811  5.63884153]
maze | pos: [4.40149796 2.19850207] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 153 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-1.15055467  5.87695517]
maze | pos: [4.40149796 2.19850206] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 154 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.90762359  6.14203692]
maze | pos: [4.40150921 2.19850205] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 155 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.90631642  6.42766725]
maze | pos: [4.4015

[32m2024-06-07 09:04:48.803[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:48.804[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 4.40149861e+00  2.19850204e+00 -4.95340660e-05 -1.47836871e-13][0m
[32m2024-06-07 09:04:48.805[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 3.5909348  4.839097  -0.7224593  4.886448 ][0m
[32m2024-06-07 09:04:48.806[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-1.53297364  7.52704289][0m
[32m2024-06-07 09:04:48.807[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 4.40149833e+00  2.19850204e+00 -2.80353142e-05 -1.09084211e-13][0m
[32m2024-06-07 09:04:48.807[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:48.808[0m | [1mINFO    [0m | [36m

t: 181 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-1.6603272   7.48573163]
maze | pos: [4.40149861 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 182 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-1.53297364  7.52704289]
maze | pos: [4.40149833 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 183 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-1.67920505  7.69056847]
maze | pos: [4.40149817 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 184 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-1.42187495  7.83089736]
maze | pos: [4.40149808 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0

[32m2024-06-07 09:04:49.005[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [4.57639396e+00 2.19850204e+00 2.46006538e+00 6.32763881e-15][0m
[32m2024-06-07 09:04:49.006[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [3.9277704  5.935937   4.0400267  0.15245628][0m
[32m2024-06-07 09:04:49.006[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [0.9313377  3.88989117][0m
[32m2024-06-07 09:04:49.008[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [4.60315413e+00 2.19850204e+00 2.67601736e+00 6.32763881e-15][0m
[32m2024-06-07 09:04:49.009[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:49.009[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:49.010[0m | [1mINFO    [0m | [36m

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 219 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.9313377  3.88989117]
maze | pos: [4.60315413 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 220 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.91631601 3.83314374]
maze | pos: [4.63203291 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 221 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.91057781 3.98564151]
maze | pos: [4.66301157 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 222 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.92967254 4.16384127]
maze | pos: [4.6961306  2.1

[32m2024-06-07 09:04:49.208[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:49.208[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:49.209[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:49.210[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 5.47309194e+00  2.19850204e+00 -6.22720746e-01  6.32763881e-15][0m
[32m2024-06-07 09:04:49.211[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 4.8791704  6.3343024 -0.8118696  3.630044 ][0m
[32m2024-06-07 09:04:49.212[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.7830704   7.76584437][0m
[32m2024-06-07 09:04:49.213[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [

t: 257 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.72850601  7.595624  ]
maze | pos: [5.47309194 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 258 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.7830704   7.76584437]
maze | pos: [5.46501458 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 259 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.65005522  8.04315903]
maze | pos: [5.45540825 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 260 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.54759489  8.18383458]
maze | pos: [5.44452063 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0

[32m2024-06-07 09:04:49.410[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [5.54002019e+00 2.19850196e+00 2.76344289e+00 6.06093261e-06][0m
[32m2024-06-07 09:04:49.411[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:49.411[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:49.412[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:49.413[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [5.54002019e+00 2.19850196e+00 2.76344289e+00 6.06093261e-06][0m
[32m2024-06-07 09:04:49.414[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 5.2421017   7.368535    3.9355793  -0.04349184][0m
[32m2024-06-07 09:04:49.414[0m | [1mINFO    [0m | [36m__main__[0m:[36m<mod

0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 295 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.84531175 5.13916154]
maze | pos: [5.54002019 2.19850196] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 296 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.8742179  5.12653518]
maze | pos: [5.56967087 2.19850199] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 297 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.89507787 5.07883308]
maze | pos: [5.60138269 2.19850201] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 298 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[0.9288886  5.17383911]
maze | pos: [5.63523126 2.19850202] | goal: (7

[32m2024-06-07 09:04:49.611[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [7.09871391e+00 2.19850204e+00 4.01436420e+00 6.33118650e-15][0m
[32m2024-06-07 09:04:49.612[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [6.8699813  7.4570823  3.4096613  0.36900568][0m
[32m2024-06-07 09:04:49.612[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.83343553  5.62758591][0m
[32m2024-06-07 09:04:49.614[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [7.13677700e+00 2.19850204e+00 3.80630923e+00 6.33118650e-15][0m
[32m2024-06-07 09:04:49.615[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:49.615[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:49.616[0m | [1mINFO    [0m | [3

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 334 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.83343553  5.62758591]
maze | pos: [7.136777   2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 335 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.8478628   5.78154233]
maze | pos: [7.17273014 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 336 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.86726651  6.06669524]
maze | pos: [7.20653214 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 337 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.78757731  6.32888844]
maze | pos: [7.2038

[32m2024-06-07 09:04:49.815[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:49.815[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:49.816[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:49.817[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.16068130e+00  2.19850204e+00 -3.80135296e-01  6.29734138e-15][0m
[32m2024-06-07 09:04:49.818[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [ 7.051571    8.8519125  -0.48151827  1.722846  ][0m
[32m2024-06-07 09:04:49.819[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [-0.21049338  8.37625649][0m
[32m2024-06-07 09:04:49.820[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observatio

t: 374 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.28408468  8.35661795]
maze | pos: [7.1606813  2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 375 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.21049338  8.37625649]
maze | pos: [7.15638768 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 376 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.1513756   8.17706302]
maze | pos: [7.15174376 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 377 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[-0.07772218  8.13260224]
maze | pos: [7.1469258  2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0

[32m2024-06-07 09:04:50.018[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.09184538e+00  2.19850204e+00 -9.50039573e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.019[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:50.019[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:50.020[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:50.020[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.09184538e+00  2.19850204e+00 -9.50039573e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.023[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:50.023[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1ma

t: 413 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.19905681e-03 6.80149796e+00]
maze | pos: [7.09184538 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 414 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.15857259e-03 6.80149796e+00]
maze | pos: [7.09090513 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 415 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.12030408e-03 6.80149796e+00]
maze | pos: [7.08997455 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 416 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.08380945e-03 6.80149796e+00]
maze | pos: [7.08905352 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:50.221[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:50.222[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:50.223[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.06276795e+00  2.19850204e+00 -6.49083174e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.224[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:50.225[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [2.14037076e-03 6.80149796e+00][0m
[32m2024-06-07 09:04:50.226[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.06212551e+00  2.19850204e+00 -6.42439704e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.226[0m | [1mINFO    [0m | [36m__main__[0m:[36m<mod

t: 450 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.16250476e-03 6.80149796e+00]
maze | pos: [7.06276795 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 451 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.14037076e-03 6.80149796e+00]
maze | pos: [7.06212551 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 452 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.11846340e-03 6.80149796e+00]
maze | pos: [7.06148964 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 453 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.09678034e-03 6.80149796e+00]
maze | pos: [7.06086029 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:50.423[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.04289659e+00  2.19850204e+00 -4.43593558e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.424[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:50.424[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:50.425[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:50.426[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.04289659e+00  2.19850204e+00 -4.43593558e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.428[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:50.428[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1ma

t: 487 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.47788817e-03 6.80149796e+00]
maze | pos: [7.04289659 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 488 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.46276176e-03 6.80149796e+00]
maze | pos: [7.04245754 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 489 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.44779016e-03 6.80149796e+00]
maze | pos: [7.04202298 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 490 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.43297181e-03 6.80149796e+00]
maze | pos: [7.04159287 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:50.627[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:50.628[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:50.628[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:50.629[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.03054778e+00  2.19850204e+00 -3.15894470e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.630[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:50.631[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [1.04167056e-03 6.80149796e+00][0m
[32m2024-06-07 09:04:50.632[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.03023512e+00  2.1985020

t: 520 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.05244247e-03 6.80149796e+00]
maze | pos: [7.03054778 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 521 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.04167056e-03 6.80149796e+00]
maze | pos: [7.03023512 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 522 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.03100890e-03 6.80149796e+00]
maze | pos: [7.02992565 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 523 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.02045637e-03 6.80149796e+00]
maze | pos: [7.02961936 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:50.831[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.02243574e+00  2.19850204e+00 -2.32007885e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.831[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:50.832[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:50.833[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:50.834[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.02243574e+00  2.19850204e+00 -2.32007885e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:50.835[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:50.835[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1ma

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 550 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[7.72963681e-04 6.80149796e+00]
maze | pos: [7.02243574 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 551 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[7.65052277e-04 6.80149796e+00]
maze | pos: [7.0222061  2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 552 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[7.57221848e-04 6.80149796e+00]
maze | pos: [7.02197882 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 553 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[7.49471565e-04 6.80149796e

[32m2024-06-07 09:04:51.032[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:51.033[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:51.033[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:51.034[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.01614229e+00  2.19850204e+00 -1.66927356e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:51.034[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:51.035[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [5.50447472e-04 6.80149796e+00][0m
[32m2024-06-07 09:04:51.036[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.01597707e+00  2.1985020

t: 582 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[5.56139648e-04 6.80149796e+00]
maze | pos: [7.01614229 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 583 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[5.50447472e-04 6.80149796e+00]
maze | pos: [7.01597707 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 584 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[5.44813556e-04 6.80149796e+00]
maze | pos: [7.01581354 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 585 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[5.39237305e-04 6.80149796e+00]
maze | pos: [7.01565169 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:51.234[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:51.235[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:51.236[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.01103189e+00  2.19850204e+00 -1.14080760e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:51.242[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:51.243[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [3.76184391e-04 6.80149796e+00][0m
[32m2024-06-07 09:04:51.247[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.01091898e+00  2.19850204e+00 -1.12913125e-02  6.29734138e-15][0m
[32m2024-06-07 09:04:51.249[0m | [1mINFO    [0m | [36m__main__[0m:[36m<mod

t: 619 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.80074512e-04 6.80149796e+00]
maze | pos: [7.01103189 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 620 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.76184391e-04 6.80149796e+00]
maze | pos: [7.01091898 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 621 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.72334085e-04 6.80149796e+00]
maze | pos: [7.01080722 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 622 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[3.68523188e-04 6.80149796e+00]
maze | pos: [7.01069661 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:51.438[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:51.439[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.00761733e+00  2.19850204e+00 -7.87708024e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:51.440[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:51.441[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [2.59748852e-04 6.80149796e+00][0m
[32m2024-06-07 09:04:51.442[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.00753937e+00  2.19850204e+00 -7.79645710e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:51.443[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:51.443[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>

t: 655 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.62434914e-04 6.80149796e+00]
maze | pos: [7.00761733 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 656 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.59748852e-04 6.80149796e+00]
maze | pos: [7.00753937 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 657 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.57090282e-04 6.80149796e+00]
maze | pos: [7.0074622  2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 658 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[2.54458923e-04 6.80149796e+00]
maze | pos: [7.00738582 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:51.639[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:51.640[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:51.640[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m_: {}[0m
[32m2024-06-07 09:04:51.642[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.00525964e+00  2.19850204e+00 -5.43898843e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:51.642[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:51.643[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [1.79352115e-04 6.80149796e+00][0m
[32m2024-06-07 09:04:51.644[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.00520580e+00  2.1985020

t: 691 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.81206794e-04 6.80149796e+00]
maze | pos: [7.00525964 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 692 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.79352115e-04 6.80149796e+00]
maze | pos: [7.0052058  2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 693 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.77516418e-04 6.80149796e+00]
maze | pos: [7.00515252 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 694 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.75699510e-04 6.80149796e+00]
maze | pos: [7.00509978 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar

[32m2024-06-07 09:04:51.842[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.00355773e+00  2.19850204e+00 -3.67904456e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:51.842[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:51.843[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [1.21317490e-04 6.80149796e+00][0m
[32m2024-06-07 09:04:51.845[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.00352131e+00  2.19850204e+00 -3.64138897e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:51.846[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:51.847[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:51.847[0m | [1mINFO    [0m | [36m__main__[0m:[3

sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 730 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.21317490e-04 6.80149796e+00]
maze | pos: [7.00352131 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 731 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.20075786e-04 6.80149796e+00]
maze | pos: [7.00348527 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 732 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.18846792e-04 6.80149796e+00]
maze | pos: [7.0034496  2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 733 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[1.17630377e-04 6.80149796e

[32m2024-06-07 09:04:52.046[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mstate: [ 7.00240652e+00  2.19850204e+00 -2.48858204e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:52.049[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mnext_waypoint: [7. 9. 0. 0.][0m
[32m2024-06-07 09:04:52.049[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1maction: [8.20616658e-05 6.80149796e+00][0m
[32m2024-06-07 09:04:52.050[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mnext_observation: [ 7.00238189e+00  2.19850204e+00 -2.46311101e-03  6.29734138e-15][0m
[32m2024-06-07 09:04:52.051[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m47[0m - [1mreward: 0.0[0m
[32m2024-06-07 09:04:52.051[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m48[0m - [1mterminal: False[0m
[32m2024-06-07 09:04:52.052[0m | [1mINFO    [0m | [36m__main__[0m:[3

t: 767 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[8.29102653e-05 6.80149796e+00]
maze | pos: [7.00240652 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 768 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[8.20616658e-05 6.80149796e+00]
maze | pos: [7.00238189 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 769 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[8.12217519e-05 6.80149796e+00]
maze | pos: [7.00235751 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, sparse is getting called
0.0, sparse, ... is getting called
t: 770 | r: 0.00 |  R: 0.00 | score: -0.0251 | ref_max_score: 273.99 | ref_min_score: 6.7[8.03904347e-05 6.80149796e+00]
maze | pos: [7.00233338 2.19850204] | goal: (7, 9)
sparse, mess it up
0.0, sparse, spar