In [1]:
import sys
import logging
import numpy as np
from joblib import hash, dump, load
import os

from deer.default_parser import process_args
from deer.agent import NeuralAgent
from deer.learning_algos.CRAR_torch import CRAR
from simple_maze_env import MyEnv as simple_maze_env
import deer.experiment.base_controllers as bc

from deer.policies import EpsilonGreedyPolicy

In [2]:
class Defaults:
    # ----------------------
    # Experiment Parameters
    # ----------------------
    steps_per_epoch = 5000
    epochs = 50
    steps_per_test = 1000
    period_btw_summary_perfs = 1
    
    # ----------------------
    # Environment Parameters
    # ----------------------
    frame_skip = 2

    # ----------------------
    # DQN Agent parameters:
    # ----------------------
    update_rule = 'rmsprop'
    learning_rate = 5 * 1E-7 # 1E-4
    learning_rate_decay = 0.9
    discount = 0.9
    discount_inc = 1
    discount_max = 0.99
    rms_decay = 0.9
    rms_epsilon = 0.0001
    momentum = 0
    clip_norm = 1.0
    epsilon_start = 1.0
    epsilon_min = 1.0
    epsilon_decay = 10000
    update_frequency = 1
    replay_memory_size = 1000000 #replacing with 200000 will works just fine (in case you dont have 18gb of memory)
    batch_size = 32
    freeze_interval = 1000
    deterministic = False

higher_dim_obs = True

In [3]:

parameters = Defaults()
if parameters.deterministic:
    rng = np.random.RandomState(123456)
else:
    rng = np.random.RandomState()

# --- Instantiate environment ---
env = simple_maze_env(rng, higher_dim_obs=higher_dim_obs)

# --- Instantiate learning_algo ---
learning_algo = CRAR(
    env,
    parameters.rms_decay,
    parameters.rms_epsilon,
    parameters.momentum,
    parameters.clip_norm,
    parameters.freeze_interval,
    parameters.batch_size,
    parameters.update_rule,
    rng,
    high_int_dim=False,
    internal_dim=2, lr=parameters.learning_rate)

test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)

# --- Instantiate agent ---
agent = NeuralAgent(
    env,
    learning_algo,
    parameters.replay_memory_size,
    max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
    parameters.batch_size,
    rng,
    test_policy=test_policy)

# --- Create unique filename for FindBestController ---
h = hash(vars(parameters), hash_name="sha1")
fname = "test_" + h
print("The parameters hash is: {}".format(h))
print("The parameters are: {}".format(parameters))

# As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
# policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
# precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
# episode or epoch (or never, hence the resetEvery='none').
agent.attach(bc.EpsilonController(
    initial_e=parameters.epsilon_start,
    e_decays=parameters.epsilon_decay,
    e_min=parameters.epsilon_min,
    evaluate_on='action',
    periodicity=1,
    reset_every='none'))

agent.run(10, 500)
print("end gathering data")

# --- Bind controllers to the agent ---
# Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and 
# learning rate as well as the training epoch number.
agent.attach(bc.VerboseController(
    evaluate_on='epoch', 
    periodicity=1))

# Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we 
# wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
agent.attach(bc.LearningRateController(
    initial_learning_rate=parameters.learning_rate, 
    learning_rate_decay=parameters.learning_rate_decay,
    periodicity=1))

# Same for the discount factor.
agent.attach(bc.DiscountFactorController(
    initial_discount_factor=parameters.discount, 
    discount_factor_growth=parameters.discount_inc, 
    discount_factor_max=parameters.discount_max,
    periodicity=1))

# During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
# Plus, we also want to display after each training episode (!= than after every training) the average bellman
# residual and the average of the V values obtained during the last episode, hence the two last arguments.
agent.attach(bc.TrainerController(
    evaluate_on='action', 
    periodicity=parameters.update_frequency, 
    show_episode_avg_V_value=True, 
    show_avg_Bellman_residual=True))

# We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one 
# has the highest validation score.
# To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is 
# important that the validationID is the same than the id argument of the InterleavedTestEpochController.
# The FindBestController will dump on disk the validation scores for each and every network, as well as the 
# structure of the neural network having the best validation score. These dumps can then used to plot the evolution 
# of the validation and test scores (see below) or simply recover the resulting neural network for your 
# application.
agent.attach(bc.FindBestController(
    validationID=simple_maze_env.VALIDATION_MODE,
    testID=None,
    unique_fname=fname))

# All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
# "validation epoch" between each training epoch. For each validation epoch, we want also to display the sum of all 
# rewards obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env 
# every [parameters.period_btw_summary_perfs] *validation* epochs.
agent.attach(bc.InterleavedTestEpochController(
    id=simple_maze_env.VALIDATION_MODE, 
    epoch_length=parameters.steps_per_test,
    periodicity=1,
    show_score=True,
    summarize_every=1))

The parameters hash is: 62977be8e45d8a56a5537c11dfd5d2fd8dda69e0
The parameters are: <__main__.Defaults object at 0x2b58315f7a60>
end gathering data


In [4]:
try:
    os.mkdir("params")
except Exception:
    pass
dump(vars(parameters), "params/" + fname + ".jldump")
agent.gathering_data=False
agent.run(parameters.epochs, parameters.steps_per_epoch)

# --- Show results ---
basename = "scores/" + fname
scores = load(basename + "_scores.jldump")
print (scores)

Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([-0.2001, -0.0236], device='cuda:0') tensor([ 0.0084, -0.2321], device='cuda:0') tensor([-0.2001, -0.0236], device='cuda:0')
R[0]
tensor([0.0068], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.04756144569069147 0.004027055965736508 1.1906292102336884 0.0 1.7083818734136003e-06 0.015413872413337231 3.389960375670853e-06
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([-0.1307, -0.0745], device='cuda:0') tensor([ 0.0872, -0.2843], device='cuda:0') tensor([-0.1316, -0.0743], device='cuda:0')
R[0]
tensor([0.0654], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., sel

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.4866, -0.6834], device='cuda:0') tensor([ 0.6708, -0.8451], device='cuda:0') tensor([ 0.4866, -0.6834], device='cuda:0')
R[0]
tensor([0.1438], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.0289008993729949 0.07184129229723477 1.1544127140045166 0.0 2.424506373634472e-05 0.3727572676539421 5.25310458669992e-07
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.5149, -0.7220], device='cuda:0') tensor([ 0.7003, -0.8971], device='cuda:0') tensor([ 0.5141, -0.7224], device='cuda:0')
R[0]
tensor([0.0652], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.loss

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.6583, -0.9561], device='cuda:0') tensor([ 0.8042, -1.0824], device='cuda:0') tensor([ 0.6582, -0.9560], device='cuda:0')
R[0]
tensor([0.1117], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.01916776829585433 0.17670471388171427 1.1370086216926574 0.0 2.8285180472495952e-06 0.6835689687728882 3.5883744619269464e-08
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.6658, -0.9713], device='cuda:0') tensor([ 0.8032, -1.0888], device='cuda:0') tensor([ 0.6658, -0.9713], device='cuda:0')
R[0]
tensor([0.0557], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7242, -1.0744], device='cuda:0') tensor([ 0.8472, -1.1711], device='cuda:0') tensor([ 0.7243, -1.0744], device='cuda:0')
R[0]
tensor([0.1614], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.012608046386390924 0.29727718187193386 1.129894293308258 0.0 1.8188895339721344e-07 0.8451109858751297 3.751461224332786e-09
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7289, -1.0818], device='cuda:0') tensor([ 0.8444, -1.1786], device='cuda:0') tensor([ 0.7290, -1.0818], device='cuda:0')
R[0]
tensor([0.1179], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7658, -1.1393], device='cuda:0') tensor([ 0.8587, -1.2114], device='cuda:0') tensor([ 0.7658, -1.1393], device='cuda:0')
R[0]
tensor([0.0638], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.008036040920764207 0.4258656803339254 1.125619303226471 0.0 2.991443569588803e-08 0.9459905099868774 6.239165262211799e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7685, -1.1441], device='cuda:0') tensor([ 0.8815, -1.2414], device='cuda:0') tensor([ 0.7685, -1.1441], device='cuda:0')
R[0]
tensor([0.0857], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.los

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7881, -1.1847], device='cuda:0') tensor([ 0.8613, -1.2381], device='cuda:0') tensor([ 0.7881, -1.1847], device='cuda:0')
R[0]
tensor([0.0660], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.004912124789319933 0.5593158588863444 1.1227729935646058 0.0 3.1948029961537118e-09 1.0149186344146728 1.5480277815171918e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7894, -1.1883], device='cuda:0') tensor([ 0.8834, -1.2676], device='cuda:0') tensor([ 0.7893, -1.1883], device='cuda:0')
R[0]
tensor([0.0878], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7962, -1.2203], device='cuda:0') tensor([ 0.8484, -1.2591], device='cuda:0') tensor([ 0.7962, -1.2203], device='cuda:0')
R[0]
tensor([0.1245], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.002853866083547473 0.6964666336949449 1.1203613846302032 0.0 6.384031706607729e-10 1.0635317630767822 8.848910959535772e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7966, -1.2233], device='cuda:0') tensor([ 0.8509, -1.2563], device='cuda:0') tensor([ 0.7966, -1.2233], device='cuda:0')
R[0]
tensor([0.1673], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7983, -1.2487], device='cuda:0') tensor([ 0.8621, -1.3001], device='cuda:0') tensor([ 0.7983, -1.2487], device='cuda:0')
R[0]
tensor([0.0905], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.0015211258821655066 0.8363316186300944 1.11802392578125 0.0 2.4738396486867485e-10 1.099682912349701 6.753746739640576e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7984, -1.2509], device='cuda:0') tensor([ 0.8358, -1.2694], device='cuda:0') tensor([ 0.7984, -1.2509], device='cuda:0')
R[0]
tensor([0.1681], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.los

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7978, -1.2711], device='cuda:0') tensor([ 0.8188, -1.2834], device='cuda:0') tensor([ 0.7978, -1.2711], device='cuda:0')
R[0]
tensor([0.1268], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.0007608040190534666 0.9787233109499793 1.1159811396598815 0.0 1.6571085570404875e-10 1.1272138969898224 5.985127282670532e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7976, -1.2730], device='cuda:0') tensor([ 0.8236, -1.2845], device='cuda:0') tensor([ 0.7976, -1.2730], device='cuda:0')
R[0]
tensor([0.0702], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7956, -1.2891], device='cuda:0') tensor([ 0.8045, -1.2925], device='cuda:0') tensor([ 0.7956, -1.2891], device='cuda:0')
R[0]
tensor([0.1276], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.0003592694269900676 1.1225401675782631 1.1143136048316955 0.0 1.2000849125559653e-10 1.14822442984581 5.598521868321882e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7954, -1.2905], device='cuda:0') tensor([ 0.8100, -1.2937], device='cuda:0') tensor([ 0.7954, -1.2905], device='cuda:0')
R[0]
tensor([0.0710], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7926, -1.3032], device='cuda:0') tensor([ 0.7938, -1.2971], device='cuda:0') tensor([ 0.7926, -1.3032], device='cuda:0')
R[0]
tensor([0.1695], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.00018889752913673873 1.2679356243468356 1.1135774509906768 0.0 9.483655205588804e-11 1.163835164785385 6.119197770715346e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7922, -1.3043], device='cuda:0') tensor([ 0.7974, -1.3022], device='cuda:0') tensor([ 0.7922, -1.3043], device='cuda:0')
R[0]
tensor([0.0717], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7891, -1.3138], device='cuda:0') tensor([ 0.7823, -1.3102], device='cuda:0') tensor([ 0.7891, -1.3138], device='cuda:0')
R[0]
tensor([0.1286], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.0001342989746844978 1.4138984736397398 1.113268494606018 0.0 8.287730779432679e-11 1.1747352283000947 7.141386060283139e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7888, -1.3146], device='cuda:0') tensor([ 0.7817, -1.3110], device='cuda:0') tensor([ 0.7888, -1.3146], device='cuda:0')
R[0]
tensor([0.1286], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7866, -1.3212], device='cuda:0') tensor([ 0.7790, -1.3174], device='cuda:0') tensor([ 0.7866, -1.3211], device='cuda:0')
R[0]
tensor([0.1289], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.00011505333585955668 1.5607783199779224 1.1129174389839172 0.0 7.977230065403518e-11 1.1824612276554107 8.676205620616884e-11
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7866, -1.3217], device='cuda:0') tensor([ 0.7790, -1.3180], device='cuda:0') tensor([ 0.7866, -1.3217], device='cuda:0')
R[0]
tensor([0.1289], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7873, -1.3259], device='cuda:0') tensor([ 0.8056, -1.3436], device='cuda:0') tensor([ 0.7873, -1.3259], device='cuda:0')
R[0]
tensor([0.0938], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
0.00010098152165301144 1.7078936416327488 1.1121408791542053 0.0 9.714770066171497e-11 1.1892647473812104 1.01342362279766e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7875, -1.3263], device='cuda:0') tensor([ 0.7806, -1.3229], device='cuda:0') tensor([ 0.7875, -1.3263], device='cuda:0')
R[0]
tensor([0.1291], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7905, -1.3291], device='cuda:0') tensor([ 0.8078, -1.3458], device='cuda:0') tensor([ 0.7905, -1.3291], device='cuda:0')
R[0]
tensor([0.0940], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
9.18041870900197e-05 1.8558226051523816 1.1113502502441406 0.0 9.760126862268237e-11 1.196040969133377 1.1668707777801046e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7908, -1.3294], device='cuda:0') tensor([ 0.8081, -1.3459], device='cuda:0') tensor([ 0.7908, -1.3294], device='cuda:0')
R[0]
tensor([0.0940], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7950, -1.3312], device='cuda:0') tensor([ 0.7906, -1.3258], device='cuda:0') tensor([ 0.7950, -1.3312], device='cuda:0')
R[0]
tensor([0.0730], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
8.191801493376261e-05 2.003864876595093 1.113055708169937 0.0 7.552264080779026e-11 1.2023213169574738 1.265982532139276e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7954, -1.3314], device='cuda:0') tensor([ 0.7905, -1.3254], device='cuda:0') tensor([ 0.7954, -1.3314], device='cuda:0')
R[0]
tensor([0.1703], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.los

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.7998, -1.3327], device='cuda:0') tensor([ 0.8156, -1.3478], device='cuda:0') tensor([ 0.7998, -1.3327], device='cuda:0')
R[0]
tensor([0.0942], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
7.569316616718424e-05 2.1525594152777923 1.112010358095169 0.0 9.164116832049985e-11 1.2080748040676117 1.3456835823433267e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8002, -1.3328], device='cuda:0') tensor([ 0.7949, -1.3301], device='cuda:0') tensor([ 0.8002, -1.3328], device='cuda:0')
R[0]
tensor([0.1295], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8044, -1.3337], device='cuda:0') tensor([ 0.8001, -1.3283], device='cuda:0') tensor([ 0.8044, -1.3337], device='cuda:0')
R[0]
tensor([0.1705], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
6.934018764150097e-05 2.301338797005592 1.1116603989601135 0.0 1.0530591454394056e-10 1.2131535222530365 1.5356398908039638e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8048, -1.3338], device='cuda:0') tensor([ 0.8002, -1.3285], device='cuda:0') tensor([ 0.8048, -1.3338], device='cuda:0')
R[0]
tensor([0.0731], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8087, -1.3345], device='cuda:0') tensor([ 0.8236, -1.3485], device='cuda:0') tensor([ 0.8087, -1.3345], device='cuda:0')
R[0]
tensor([0.0943], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
6.66936635680031e-05 2.4504674919906537 1.1115087985992431 0.0 1.1127866450522106e-10 1.2175967428684236 1.584509332630546e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8091, -1.3345], device='cuda:0') tensor([ 0.8239, -1.3486], device='cuda:0') tensor([ 0.8091, -1.3345], device='cuda:0')
R[0]
tensor([0.0943], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8125, -1.3351], device='cuda:0') tensor([ 0.8077, -1.3299], device='cuda:0') tensor([ 0.8125, -1.3351], device='cuda:0')
R[0]
tensor([0.0731], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
6.256369883703883e-05 2.599122018308146 1.1120598158836366 0.0 1.1115786646698212e-10 1.2214728147983551 1.6610711894848863e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8129, -1.3351], device='cuda:0') tensor([ 0.8081, -1.3299], device='cuda:0') tensor([ 0.8129, -1.3351], device='cuda:0')
R[0]
tensor([0.0731], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8159, -1.3354], device='cuda:0') tensor([ 0.8112, -1.3303], device='cuda:0') tensor([ 0.8159, -1.3354], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.944107773757423e-05 2.7483770389172713 1.1120813755989074 0.0 1.0776901049336374e-10 1.224657421350479 1.7525123707784473e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8162, -1.3354], device='cuda:0') tensor([ 0.8303, -1.3487], device='cuda:0') tensor([ 0.8162, -1.3354], device='cuda:0')
R[0]
tensor([0.0943], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8188, -1.3357], device='cuda:0') tensor([ 0.8327, -1.3487], device='cuda:0') tensor([ 0.8188, -1.3357], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.741214783483883e-05 2.897612730481429 1.1124332993030548 0.0 1.0641354142304493e-10 1.2273742735385895 1.8153858882818953e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8191, -1.3357], device='cuda:0') tensor([ 0.8143, -1.3307], device='cuda:0') tensor([ 0.8191, -1.3357], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8213, -1.3359], device='cuda:0') tensor([ 0.8165, -1.3309], device='cuda:0') tensor([ 0.8213, -1.3359], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.4551544886635385e-05 3.046538685102714 1.1126277360916137 0.0 1.0135529115995467e-10 1.2296699047088624 1.823880868556671e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8215, -1.3359], device='cuda:0') tensor([ 0.8176, -1.3339], device='cuda:0') tensor([ 0.8215, -1.3359], device='cuda:0')
R[0]
tensor([0.1297], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8233, -1.3361], device='cuda:0') tensor([ 0.8199, -1.3318], device='cuda:0') tensor([ 0.8233, -1.3361], device='cuda:0')
R[0]
tensor([0.1710], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.390763998002512e-05 3.1959645866907667 1.112726744890213 0.0 9.998740169159248e-11 1.2315236446857452 1.9563400099897877e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8235, -1.3361], device='cuda:0') tensor([ 0.8187, -1.3311], device='cuda:0') tensor([ 0.8235, -1.3361], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8251, -1.3362], device='cuda:0') tensor([ 0.8218, -1.3320], device='cuda:0') tensor([ 0.8251, -1.3362], device='cuda:0')
R[0]
tensor([0.1710], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.2758430520043476e-05 3.345164226541994 1.113185384273529 0.0 9.457540750545946e-11 1.2330884063243865 1.911305385071671e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8252, -1.3362], device='cuda:0') tensor([ 0.8204, -1.3313], device='cuda:0') tensor([ 0.8252, -1.3362], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8265, -1.3362], device='cuda:0') tensor([ 0.8233, -1.3322], device='cuda:0') tensor([ 0.8265, -1.3362], device='cuda:0')
R[0]
tensor([0.1710], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.078931165553513e-05 3.4945806886430364 1.1123921275138855 0.0 1.1251586162774885e-10 1.2344103398323059 1.9767854575469813e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8267, -1.3362], device='cuda:0') tensor([ 0.8235, -1.3322], device='cuda:0') tensor([ 0.8267, -1.3362], device='cuda:0')
R[0]
tensor([0.1710], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8278, -1.3363], device='cuda:0') tensor([ 0.8230, -1.3315], device='cuda:0') tensor([ 0.8278, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
5.0480591991799885e-05 3.643854836578248 1.1134809279441833 0.0 9.933187361532703e-11 1.2355253303050995 1.9704524856223317e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8279, -1.3363], device='cuda:0') tensor([ 0.8244, -1.3345], device='cuda:0') tensor([ 0.8279, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8289, -1.3363], device='cuda:0') tensor([ 0.8241, -1.3315], device='cuda:0') tensor([ 0.8289, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.9868772464833456e-05 3.7929476716022474 1.112191870689392 0.0 1.243030077757723e-10 1.2364569597244264 1.932406656912633e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8290, -1.3363], device='cuda:0') tensor([ 0.8419, -1.3485], device='cuda:0') tensor([ 0.8290, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8298, -1.3363], device='cuda:0') tensor([ 0.8250, -1.3316], device='cuda:0') tensor([ 0.8298, -1.3364], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.911299122250057e-05 3.94197722506593 1.1126674773693084 0.0 1.1320806392323135e-10 1.2372021479606627 2.0403606804109131e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8298, -1.3363], device='cuda:0') tensor([ 0.8428, -1.3484], device='cuda:0') tensor([ 0.8298, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8304, -1.3364], device='cuda:0') tensor([ 0.8274, -1.3324], device='cuda:0') tensor([ 0.8305, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.903517221828224e-05 4.091477985511301 1.1120158543586731 0.0 1.1402537014593151e-10 1.2377540392875672 2.0674805001341667e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8305, -1.3363], device='cuda:0') tensor([ 0.8257, -1.3316], device='cuda:0') tensor([ 0.8305, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8309, -1.3363], device='cuda:0') tensor([ 0.8274, -1.3345], device='cuda:0') tensor([ 0.8309, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.9146392379043394e-05 4.240703396287514 1.1123111720085144 0.0 1.0929499172362967e-10 1.2381223757267 2.0500463038253258e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8309, -1.3363], device='cuda:0') tensor([ 0.8274, -1.3345], device='cuda:0') tensor([ 0.8309, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8312, -1.3363], device='cuda:0') tensor([ 0.8281, -1.3325], device='cuda:0') tensor([ 0.8312, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.9090084365161605e-05 4.390062394902809 1.1124470522403718 0.0 1.1696062940380613e-10 1.2383638801574708 2.0302334857724702e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8312, -1.3363], device='cuda:0') tensor([ 0.8265, -1.3316], device='cuda:0') tensor([ 0.8312, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8314, -1.3363], device='cuda:0') tensor([ 0.8284, -1.3325], device='cuda:0') tensor([ 0.8314, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.729811408105888e-05 4.5397241863000675 1.1126740248203277 0.0 1.1570353519729792e-10 1.2385387237071992 2.064568394022359e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8314, -1.3363], device='cuda:0') tensor([ 0.8443, -1.3483], device='cuda:0') tensor([ 0.8314, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8316, -1.3363], device='cuda:0') tensor([ 0.8444, -1.3483], device='cuda:0') tensor([ 0.8316, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.772432455501985e-05 4.68940576366405 1.1116541159152984 0.0 1.187923248657796e-10 1.2386711885929107 2.0598387640013982e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8316, -1.3363], device='cuda:0') tensor([ 0.8444, -1.3483], device='cuda:0') tensor([ 0.8316, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8317, -1.3363], device='cuda:0') tensor([ 0.8287, -1.3325], device='cuda:0') tensor([ 0.8317, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.839709871157538e-05 4.838957248152001 1.1126555263996125 0.0 1.218146585824087e-10 1.2387639501094818 2.0736882322403006e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8317, -1.3363], device='cuda:0') tensor([ 0.8445, -1.3482], device='cuda:0') tensor([ 0.8318, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8318, -1.3363], device='cuda:0') tensor([ 0.8288, -1.3325], device='cuda:0') tensor([ 0.8318, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.7972774751542605e-05 4.988225975082023 1.112813239812851 0.0 1.0173821851644504e-10 1.2388302528858184 2.072411815490227e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8318, -1.3363], device='cuda:0') tensor([ 0.8446, -1.3482], device='cuda:0') tensor([ 0.8318, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8319, -1.3363], device='cuda:0') tensor([ 0.8447, -1.3482], device='cuda:0') tensor([ 0.8319, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.746792416335666e-05 5.138097159885569 1.1125962965488434 0.0 1.1785820830390036e-10 1.2388805556297302 2.106698848702848e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8319, -1.3363], device='cuda:0') tensor([ 0.8288, -1.3325], device='cuda:0') tensor([ 0.8319, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8319, -1.3363], device='cuda:0') tensor([ 0.8447, -1.3482], device='cuda:0') tensor([ 0.8319, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.786895080906106e-05 5.287512224438367 1.112356137752533 0.0 1.181423348040056e-10 1.2389210093021392 2.0835657510431816e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8319, -1.3363], device='cuda:0') tensor([ 0.8447, -1.3482], device='cuda:0') tensor([ 0.8319, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8447, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.7521749533188995e-05 5.437202481162502 1.1127480630874633 0.0 1.0336045364045532e-10 1.2389499657154084 2.0771170738953515e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8289, -1.3325], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8273, -1.3316], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.813793456924031e-05 5.58663078188966 1.1129433143138885 0.0 1.1325241500159677e-10 1.2389695589542389 2.0880691620206449e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8273, -1.3316], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.866789374864311e-05 5.735876455961494 1.1119943754673005 0.0 1.2345988320561218e-10 1.238983099937439 2.0582409931968472e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8285, -1.3345], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.784563044086099e-05 5.885118919645203 1.1118559873104095 0.0 1.359070710416077e-10 1.2389933309555055 2.107593375377803e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8290, -1.3325], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.85915669323731e-05 6.034470290737925 1.112084337234497 0.0 1.1701392566010326e-10 1.2390002415180206 2.0924761523488654e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lo

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8285, -1.3345], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.805360362297506e-05 6.183872760326369 1.1127513194084167 0.0 1.1879920946977763e-10 1.2390048191547394 2.1078428247278679e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8290, -1.3325], device='cuda:0') tensor([ 0.8321, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8321, -1.3363], device='cuda:0') tensor([ 0.8449, -1.3482], device='cuda:0') tensor([ 0.8321, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.810692688261042e-05 6.333087351373164 1.1122574353218078 0.0 1.1596747950815e-10 1.2390080547332765 2.0664620103971742e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
3 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8290, -1.3325], device='cuda:0') tensor([ 0.8321, -1.3363], device='cuda:0')
R[0]
tensor([0.1711], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.los

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.763801954686642e-05 6.482532545790774 1.1118934760093688 0.0 1.279538123233337e-10 1.2390106036663056 2.0976447423493027e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8286, -1.3345], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8285, -1.3345], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.8580508770101006e-05 6.6317620106458195 1.1121244020462036 0.0 1.1421930423693993e-10 1.2390126464366913 2.1251517989817615e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8321, -1.3363], device='cuda:0') tensor([ 0.8273, -1.3316], device='cuda:0') tensor([ 0.8321, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., sel

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8273, -1.3316], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.741977329467772e-05 6.781372242339654 1.1116471552848817 0.0 1.282266386404274e-10 1.2390137729644775 2.0475328388336322e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
1 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8286, -1.3345], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.1298], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.7545775360049447e-05 6.930870803453726 1.1127134034633637 0.0 1.241879643565369e-10 1.2390146329402925 2.0724060267873767e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8321, -1.3363], device='cuda:0') tensor([ 0.8273, -1.3316], device='cuda:0') tensor([ 0.8321, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
2 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8321, -1.3363], device='cuda:0') tensor([ 0.8273, -1.3316], device='cuda:0') tensor([ 0.8321, -1.3363], device='cuda:0')
R[0]
tensor([0.0732], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.
4.755297958399751e-05 7.080294619026361 1.1125031604766846 0.0 1.1813911726665792e-10 1.239015313386917 2.0653780974377867e-10
self.loss_interpret/500.
0.0
Printing a few elements useful for debugging:
actions_val[0], rewards_val[0], terminals_val[0]
0 0.0 False
Es[0], TEs[0], Esp_[0]
tensor([ 0.8320, -1.3363], device='cuda:0') tensor([ 0.8448, -1.3482], device='cuda:0') tensor([ 0.8320, -1.3363], device='cuda:0')
R[0]
tensor([0.0944], device='cuda:0', grad_fn=<SelectBackward0>)
self.loss_T/500., self.l

  line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)


Best neural net obtained after 1 epochs, with validation score 0.0
{'vs': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 'ts': []}
