In [2]:
import numpy as np
from PIL import Image
import cv2 #opencv
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

#keras imports
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

Using TensorFlow backend.


In [3]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "./chromedriver"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"


setting path

In [4]:
'''
* Game class: Selenium interfacing between the python and browser
* __init__():  Launch the broswer window using the attributes in chrome_options
* get_crashed() : return true if the agent as crashed on an obstacles. Gets javascript variable from game decribing the state
* get_playing(): true if game in progress, false is crashed or paused
* restart() : sends a signal to browser-javascript to restart the game
* press_up(): sends a single to press up get to the browser
* get_score(): gets current game score from javascript variables.
* pause(): pause the game
* resume(): resume a paused game if not crashed
* end(): close the browser and end the game
'''
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

銜接browser-javascript與python的模組，運行chrome並且連接至小恐龍遊戲的環境。
這裡可以得到狀態:撞毀、遊戲中、分數。指令:重啟遊戲、跳躍、暫停、恢復遊戲、結束。

In [5]:
class DinoAgent:
    def __init__(self,game): #takes game as input for taking actions
        self._game = game; 
        self.jump(); #to start the game, we need to jump once
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

這裡為DinoAgent的模組，可知Agent是否撞毀，並有跳躍與蹲下兩種行為。我們在遊戲中只考慮跳躍與不跳躍兩種。

In [6]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV, implemented using python coroutine 
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the Experience tuple

為了確認Game的state，這裡使用OpenCV的截圖來獲取環境的狀態(OpenCV中影像處理的速度有經過特別優化)，這裡的get_state方法為輸入一個action，然後回傳下一狀態的影像、reward(如果撞毀=-1，沒撞毀=0.1)、is_over(是否撞毀)。OpenCV平均可以達到5FPS的擷取速度，這對於每個跳Action後的State轉換已經足夠表示。

In [7]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)#processing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

儲存object方法，後面會儲存的資料有:行為，格式為(S,a,r,St+1,是否結束)、時間(為了辨認FPS的更新狀態)、epsilon(隨機行為的機率隨時間遞減的參數)，螢幕截圖(單純擷取chrome中的遊戲影像)，影像處理(只保留有興趣的影像區域，在小恐龍的遊戲中其實只在乎仙人掌與螢幕左邊界的距離。size為80*80)

In [8]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

使用DateFrame儲存loss,score,action,q_values。

In [9]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [12]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [13]:
'''Call only once to init file structure
'''
init_cache()

如果想要執行訓練，第一次需要執行init_cache()，他會創建所需的檔案。在這之前須建立object空白資料夾以存放。

In [14]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    print("We finish building the model")
    return model

# 建立模型
模型輸入為80x80x4，即每四張影像為一組State，再經過幾次捲積與池化，再展平成512的特徵向量，最後輸出(0,1)代表不跳與跳，這裡的類神經網路用來替代傳統的Q-table，由於影像為格式的state很難窮舉到Q-table中，因此使用類神經網路，好處是可以舉一反三，State的pixel值即使不盡相同，但是仍可以訓練出其特性。這個方法即為DQN，為一種off-policy learning。

In [15]:
''' 
main training module
Parameters:
* model => Keras Model to be trained
* game_state => Game State module with access to game environment and dino
* observe => flag to indicate wherther the model is to be trained(weight updates), else just play
'''
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,
                     #1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        #epsilon = load_obj("epsilon") 
        epsilon = 0
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[0] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        #if epsilon >= FINAL_EPSILON and t > OBSERVE:
            #epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 
            #epsilon=0
        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 20000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


# Training
這裡寫下訓練的過程，我們先觀察100次，然後到累積10萬筆之前都是EXPLORE狀態，但仍會訓練神經網路，這是因為epsilon(隨機動作的機率)在10萬次以後才會收斂到最低值，後面的狀態我們稱為訓練狀態。
訓練方式是這樣的:使用當前state預測下一步的action並且概率地(取決於epsilon)執行，這時候觀察St+1與reward並記錄下來，然後從replay buffer中隨機取出32筆資料出來訓練，得到32組(S,A,r,St+1,terminal)，如果恐龍在此State,S採取Action,A，陣亡了，那麼reward就不更新，但如果沒有陣亡，則reward=reward+GAMMA*max(Q_sa)，意思是此State的Action會讓你的恐龍活著，所以是好棒棒的，所以多給一點reward，然後在拿其S,A,r進行supervised learning，Ptrue為更新過的reward，在進行反向傳播法更新參數，如此一來網路就會更像理想的Q_table，

In [16]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [None]:
playGame(observe=False);

Now we build the model
We finish building the model
fps: 3.941933531449538
TIMESTEP 1 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.724715900262087
TIMESTEP 2 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.72381573904306
TIMESTEP 3 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.03789455732837
TIMESTEP 4 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.722915652417107
TIMESTEP 5 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 20.604451693087643
TIMESTEP 6 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.72381573904306
TIMESTEP 7 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 10.74498270782631
TIMESTEP 8 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 5.693970584641784
TIMESTEP 9 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q

TIMESTEP 78 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 19.985724156005052
TIMESTEP 79 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 23.239201258837348
TIMESTEP 80 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.037683514653587
TIMESTEP 81 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.037683514653587
TIMESTEP 82 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.261635009707458
TIMESTEP 83 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.037366958580357
TIMESTEP 84 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 22.206301388719762
TIMESTEP 85 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.261635009707458
TIMESTEP 86 / STATE observe / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 21.490185630184502
TIMESTEP 87 / STATE

TIMESTEP 144 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.9616914 / Loss  0.6733856201171875
fps: 12.97786743979529
TIMESTEP 145 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2446015 / Loss  0.8149788975715637
fps: 10.68756800281312
TIMESTEP 146 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.9308681 / Loss  0.9515091180801392
fps: 10.518788997451999
TIMESTEP 147 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.0303535 / Loss  0.9452688097953796
fps: 10.981319495534745
TIMESTEP 148 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.9925394 / Loss  0.29657119512557983
fps: 10.574399907222292
TIMESTEP 149 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.7465076 / Loss  0.8415642976760864
fps: 12.491226398236941
TIMESTEP 150 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.5524912 / Loss  0.5073165893554688
fps: 13.062623796295142
TIMESTEP 151 / STATE explore / EPSILON 0 / ACTION 1 / RE

TIMESTEP 205 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.145391 / Loss  0.4965343475341797
fps: 10.745092815092251
TIMESTEP 206 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.079951 / Loss  0.5956228971481323
fps: 16.248678970449227
TIMESTEP 207 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.908698 / Loss  0.22969657182693481
fps: 15.613916746703595
TIMESTEP 208 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.3131976 / Loss  0.2867915630340576
fps: 12.97778712897327
TIMESTEP 209 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7353578 / Loss  0.9491228461265564
fps: 15.9886555102352
TIMESTEP 210 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.129387 / Loss  0.4151580035686493
fps: 15.736882192048085
TIMESTEP 211 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.7158484 / Loss  0.6363164782524109
fps: 15.86161985546324
TIMESTEP 212 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0

TIMESTEP 266 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.945001 / Loss  0.1819562017917633
fps: 10.745120342261332
TIMESTEP 267 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.0749445 / Loss  0.3308781683444977
fps: 10.68754076973255
TIMESTEP 268 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.008649 / Loss  0.1795467883348465
fps: 10.803156745173188
TIMESTEP 269 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.909386 / Loss  0.6657708883285522
fps: 10.355283428797156
TIMESTEP 270 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.537019 / Loss  0.2004561573266983
fps: 10.861935998466905
TIMESTEP 271 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.672122 / Loss  0.11134923994541168
fps: 10.409251997816051
TIMESTEP 272 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.287189 / Loss  0.6676126718521118
fps: 11.041884513523616
TIMESTEP 273 / STATE explore / EPSILON 0 / ACTION 1 / REWARD

fps: 12.977947751611296
TIMESTEP 329 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.258 / Loss  0.49218982458114624
fps: 12.811377291234587
TIMESTEP 330 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.678406 / Loss  0.16769829392433167
fps: 12.649257354826062
TIMESTEP 331 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.232459 / Loss  0.21271124482154846
fps: 13.235626942678174
TIMESTEP 332 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.415526 / Loss  6.217686176300049
fps: 12.569688687500749
TIMESTEP 333 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.585981 / Loss  0.22511494159698486
fps: 12.729839811099714
TIMESTEP 334 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.064024 / Loss  0.19436722993850708
fps: 12.81129902776802
TIMESTEP 335 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.9165125 / Loss  0.1201055571436882
fps: 12.729917082468344
TIMESTEP 336 / STATE explore / EP

fps: 12.811338159381776
TIMESTEP 392 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.931815 / Loss  0.29639583826065063
fps: 12.648608874494123
TIMESTEP 393 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.120285 / Loss  0.18951958417892456
fps: 12.978550121916502
TIMESTEP 394 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.811976 / Loss  0.10916881263256073
fps: 10.463577216301442
TIMESTEP 395 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.12947 / Loss  0.2786596715450287
fps: 15.984877587731333
TIMESTEP 396 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.87115 / Loss  0.24166074395179749
fps: 16.386304324045568
TIMESTEP 397 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.05426 / Loss  0.2344900369644165
fps: 12.893450761592966
TIMESTEP 398 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.814245 / Loss  0.14900639653205872
fps: 12.894283149493981
TIMESTEP 399 / STATE explore / EPS

fps: 12.729723905805656
TIMESTEP 455 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.047813 / Loss  0.15599943697452545
fps: 12.569764026827938
TIMESTEP 456 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.276889 / Loss  0.24148115515708923
fps: 12.97778712897327
TIMESTEP 457 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.467405 / Loss  0.10634341835975647
fps: 12.336623645540431
TIMESTEP 458 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.885257 / Loss  0.1088339239358902
fps: 11.041884513523616
TIMESTEP 459 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.761987 / Loss  0.1473103016614914
fps: 10.574506546188891
TIMESTEP 460 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.348091 / Loss  0.3251083791255951
fps: 12.811455555657371
TIMESTEP 461 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.589461 / Loss  0.07987412810325623
fps: 12.811338159381776
TIMESTEP 462 / STATE explore /

fps: 10.632623106660752
TIMESTEP 516 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.471866 / Loss  0.5796822309494019
fps: 12.893847123380317
TIMESTEP 517 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.064175 / Loss  0.4304116368293762
fps: 12.81008606629976
TIMESTEP 518 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.095497 / Loss  1.146722674369812
fps: 10.804158552543456
TIMESTEP 519 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.586706 / Loss  0.75037682056427
fps: 12.729917082468344
TIMESTEP 520 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.483211 / Loss  0.7973191142082214
fps: 12.569688687500749
TIMESTEP 521 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.477628 / Loss  0.23401418328285217
fps: 13.235835778976933
TIMESTEP 522 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.911484 / Loss  0.4586048722267151
fps: 12.811338159381776
TIMESTEP 523 / STATE explore / EPS

fps: 12.894045313412647
TIMESTEP 579 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.010529 / Loss  0.2517279386520386
fps: 12.649257354826062
TIMESTEP 580 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.664703 / Loss  0.1806877851486206
fps: 12.89412459113155
TIMESTEP 581 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.310425 / Loss  0.47375044226646423
fps: 12.97778712897327
TIMESTEP 582 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.718001 / Loss  0.23685577511787415
fps: 12.491077597503157
TIMESTEP 583 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.207619 / Loss  1.363696813583374
fps: 12.977746973934998
TIMESTEP 584 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.966461 / Loss  0.17602160573005676
fps: 16.248616023522757
TIMESTEP 585 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  16.34267 / Loss  0.2609027624130249
fps: 15.861080018151565
TIMESTEP 586 / STATE explore / E

fps: 10.803156745173188
TIMESTEP 642 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.125976 / Loss  0.27441856265068054
fps: 10.74506528806421
TIMESTEP 643 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.662508 / Loss  0.3197072148323059
fps: 9.99288588589304
TIMESTEP 644 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.159155 / Loss  0.7150365710258484
fps: 11.103091910207539
TIMESTEP 645 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  19.962723 / Loss  0.4643216133117676
fps: 10.861879740721847
TIMESTEP 646 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.122387 / Loss  0.29414230585098267
fps: 10.687676936523266
TIMESTEP 647 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.299786 / Loss  0.36450156569480896
fps: 12.977706819145217
TIMESTEP 648 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  16.59065 / Loss  1.9161603450775146
fps: 10.249183228136538
TIMESTEP 649 / STATE explore / E

fps: 12.72976254066916
TIMESTEP 704 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.700189 / Loss  0.5424468517303467
fps: 15.984633912102318
TIMESTEP 705 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.098186 / Loss  0.171181321144104
fps: 10.410983113785237
TIMESTEP 706 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.01702 / Loss  0.33551037311553955
fps: 11.165278872159636
TIMESTEP 707 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.155502 / Loss  0.5791105031967163
fps: 12.81141642332645
TIMESTEP 708 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.106241 / Loss  0.21767139434814453
fps: 12.569688687500749
TIMESTEP 709 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.930707 / Loss  0.24189741909503937
fps: 12.81141642332645
TIMESTEP 710 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.603489 / Loss  0.09719446301460266
fps: 12.649295502798687
TIMESTEP 711 / STATE explore / E

fps: 12.649257354826062
TIMESTEP 767 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.082613 / Loss  0.48187553882598877
fps: 10.574586526825334
TIMESTEP 768 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.946426 / Loss  0.30245688557624817
fps: 10.80307326961167
TIMESTEP 769 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.336994 / Loss  0.19870668649673462
fps: 10.574426566762302
TIMESTEP 770 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.000446 / Loss  0.5245969891548157
fps: 10.803184570647037
TIMESTEP 771 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.054521 / Loss  0.18416404724121094
fps: 10.745037761177207
TIMESTEP 772 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.613702 / Loss  0.2993362545967102
fps: 10.355232296897606
TIMESTEP 773 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.161558 / Loss  0.34136858582496643
fps: 12.894164230356578
TIMESTEP 774 / STATE explor

fps: 10.981233243967829
TIMESTEP 828 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.490821 / Loss  0.3278936743736267
fps: 10.35530899493629
TIMESTEP 829 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.311985 / Loss  0.8976985216140747
fps: 10.803128919842678
TIMESTEP 830 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.755998 / Loss  0.3318958282470703
fps: 10.803156745173188
TIMESTEP 831 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.390049 / Loss  0.23461520671844482
fps: 12.491151997426902
TIMESTEP 832 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  19.46985 / Loss  0.4445241093635559
fps: 12.894045313412647
TIMESTEP 833 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.8654995 / Loss  0.2076835334300995
fps: 10.803101094655505
TIMESTEP 834 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.897346 / Loss  0.3423175513744354
fps: 12.413553884355736
TIMESTEP 835 / STATE explore / 

fps: 12.649104765236482
TIMESTEP 890 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.514513 / Loss  0.5255002379417419
fps: 13.413231254337237
TIMESTEP 891 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.971975 / Loss  0.11157847940921783
fps: 12.72976254066916
TIMESTEP 892 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.867887 / Loss  0.23902998864650726
fps: 10.574533206266608
TIMESTEP 893 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.072642 / Loss  0.16962310671806335
fps: 10.92124462960552
TIMESTEP 894 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.543624 / Loss  0.12571987509727478
fps: 12.72976254066916
TIMESTEP 895 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.556528 / Loss  0.25637906789779663
fps: 10.574533206266608
TIMESTEP 896 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.158816 / Loss  0.2646161615848541
fps: 16.79468246976856
TIMESTEP 897 / STATE explore /

TIMESTEP 952 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.709639 / Loss  0.724677562713623
fps: 12.649219207083531
TIMESTEP 953 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.780499 / Loss  0.5928736925125122
fps: 10.80307326961167
TIMESTEP 954 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.63856 / Loss  0.32295098900794983
fps: 12.811494688227352
TIMESTEP 955 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.706817 / Loss  0.25400951504707336
fps: 12.413590623890139
TIMESTEP 956 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.12033 / Loss  0.32349854707717896
fps: 13.235585176209229
TIMESTEP 957 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.508079 / Loss  0.28523731231689453
fps: 12.56818209058983
TIMESTEP 958 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.694475 / Loss  0.2007620632648468
fps: 12.650516359424765
TIMESTEP 959 / STATE explore / EPSILON 0 / ACTION 1 / RE

TIMESTEP 1014 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.1510725 / Loss  0.3315693736076355
fps: 12.649104765236482
TIMESTEP 1015 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.125092 / Loss  0.4553770124912262
fps: 9.002294409924557
TIMESTEP 1016 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.4547 / Loss  0.5978546738624573
fps: 10.981261994339588
TIMESTEP 1017 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.3384495 / Loss  0.25995227694511414
fps: 8.962207184203384
TIMESTEP 1018 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.086813 / Loss  0.31139856576919556
fps: 10.981233243967829
TIMESTEP 1019 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.196327 / Loss  0.9327086210250854
fps: 12.491114797354244
TIMESTEP 1020 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.390406 / Loss  0.33997663855552673
fps: 9.25273988316891
TIMESTEP 1021 / STATE explore / EPSILON 0 / ACTIO

fps: 10.861795355196918
TIMESTEP 1075 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.174512 / Loss  0.4263279438018799
fps: 10.803128919842678
TIMESTEP 1076 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.977103 / Loss  0.3127215504646301
fps: 9.992862078002526
TIMESTEP 1077 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  16.35633 / Loss  0.24799181520938873
fps: 9.60852563118673
TIMESTEP 1078 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.443605 / Loss  1.0316133499145508
fps: 10.804910030011463
TIMESTEP 1079 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.155349 / Loss  0.3369404375553131
fps: 9.21140778937844
TIMESTEP 1080 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.8203745 / Loss  0.33298182487487793
fps: 10.518841757326793
TIMESTEP 1081 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.526103 / Loss  0.9237378835678101
fps: 9.167737324237661
TIMESTEP 1082 / STATE explore

TIMESTEP 1136 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.590357 / Loss  0.3571202754974365
fps: 13.062501751817997
TIMESTEP 1137 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.870232 / Loss  0.570713996887207
fps: 15.734461750847814
TIMESTEP 1138 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.56844 / Loss  0.652664303779602
fps: 12.977546202470938
TIMESTEP 1139 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.946072 / Loss  0.4909950792789459
fps: 10.574666508671656
TIMESTEP 1140 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.656424 / Loss  0.8148490190505981
fps: 10.68756800281312
TIMESTEP 1141 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.501059 / Loss  0.3660222589969635
fps: 10.630736671355612
TIMESTEP 1142 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.827513 / Loss  0.5504686832427979
fps: 12.649257354826062
TIMESTEP 1143 / STATE explore / EPSILON 0 / ACTION 1

fps: 9.60859166674303
TIMESTEP 1198 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  27.753372 / Loss  0.6262657642364502
fps: 12.649219207083531
TIMESTEP 1199 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  34.290203 / Loss  0.9786842465400696
fps: 10.630736671355612
TIMESTEP 1200 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  38.38426 / Loss  24.165790557861328
fps: 10.630763615718399
TIMESTEP 1201 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  34.05149 / Loss  11.058721542358398
fps: 12.649181059571093
TIMESTEP 1202 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  33.59369 / Loss  0.8814729452133179
fps: 10.574533206266608
TIMESTEP 1203 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  26.773052 / Loss  1.3039836883544922
fps: 11.041913582359491
TIMESTEP 1204 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  26.789497 / Loss  0.6900613307952881
fps: 10.463681631756993
TIMESTEP 1205 / STATE explore

TIMESTEP 1258 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.290547 / Loss  1.448018193244934
fps: 10.862020386177177
TIMESTEP 1259 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  24.465134 / Loss  0.7953934669494629
fps: 12.97734543721883
TIMESTEP 1260 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  22.384388 / Loss  0.5500928163528442
fps: 10.354567628151322
TIMESTEP 1261 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  22.202963 / Loss  0.7518577575683594
fps: 11.043105536423283
TIMESTEP 1262 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  24.616457 / Loss  0.700712263584137
fps: 10.5744798862456
TIMESTEP 1263 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  22.596987 / Loss  0.8771374821662903
fps: 10.862020386177177
TIMESTEP 1264 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  20.848467 / Loss  1.1210222244262695
fps: 12.729839811099714
TIMESTEP 1265 / STATE explore / EPSILON 0 / ACTION 1 

fps: 12.729801175767177
TIMESTEP 1319 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  33.409527 / Loss  1.1025171279907227
fps: 10.687676936523266
TIMESTEP 1320 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  26.009394 / Loss  0.715623676776886
fps: 13.062623796295142
TIMESTEP 1321 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  26.563862 / Loss  1.6668492555618286
fps: 12.490965999279306
TIMESTEP 1322 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  29.973259 / Loss  1.3681461811065674
fps: 10.63079056021777
TIMESTEP 1323 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  29.543646 / Loss  8.600885391235352
fps: 10.921187755824732
TIMESTEP 1324 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  30.97835 / Loss  1.3608063459396362
fps: 10.629632070068375
TIMESTEP 1325 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  28.107668 / Loss  1.239535927772522
fps: 12.977907595579044
TIMESTEP 1326 / STATE explore

TIMESTEP 1382 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  36.59062 / Loss  3.262929916381836
fps: 10.92130150397867
TIMESTEP 1383 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  37.822945 / Loss  2.330178737640381
fps: 12.81141642332645
TIMESTEP 1384 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  32.057987 / Loss  2.0575835704803467
fps: 10.630709727129409
TIMESTEP 1385 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  34.31724 / Loss  2.752070426940918
fps: 12.649142912288744
TIMESTEP 1386 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  34.49954 / Loss  1.9664700031280518
fps: 10.803295873975834
TIMESTEP 1387 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  32.004883 / Loss  3.8213932514190674
fps: 12.729801175767177
TIMESTEP 1388 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  28.356224 / Loss  1.8731889724731445
fps: 15.861080018151565
TIMESTEP 1389 / STATE explore / EPSILON 0 / ACTION 0 / R

fps: 11.967552422012538
TIMESTEP 1443 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.319794 / Loss  1.005916953086853
fps: 13.976028549817231
TIMESTEP 1444 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  21.879173 / Loss  2.709127902984619
fps: 12.649295502798687
TIMESTEP 1445 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.171824 / Loss  2.166877031326294
fps: 16.248616023522757
TIMESTEP 1446 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  22.015984 / Loss  1.3826186656951904
fps: 12.569575680203304
TIMESTEP 1447 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  19.497953 / Loss  2.5053210258483887
fps: 12.97786743979529
TIMESTEP 1448 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.11337 / Loss  0.5167303681373596
fps: 12.811338159381776
TIMESTEP 1449 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  22.73066 / Loss  2.0459213256835938
fps: 12.811494688227352
TIMESTEP 1450 / STATE explore 

fps: 13.148576928020365
TIMESTEP 1506 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  20.459753 / Loss  1.5330216884613037
fps: 12.569688687500749
TIMESTEP 1507 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  25.656454 / Loss  1.041861891746521
fps: 12.649028471822215
TIMESTEP 1508 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  23.9927 / Loss  2.068476915359497
fps: 13.324175876539524
TIMESTEP 1509 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  24.220505 / Loss  0.5526223182678223
fps: 12.811338159381776
TIMESTEP 1510 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  21.94071 / Loss  1.2619242668151855
fps: 12.81141642332645
TIMESTEP 1511 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.139801 / Loss  2.008439540863037
fps: 12.811338159381776
TIMESTEP 1512 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  25.201237 / Loss  0.6776688694953918
fps: 10.574559866478756
TIMESTEP 1513 / STATE explore /

TIMESTEP 1568 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.796529 / Loss  0.690575361251831
fps: 12.649181059571093
TIMESTEP 1569 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  23.842506 / Loss  1.0874888896942139
fps: 16.65483626313846
TIMESTEP 1570 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  16.590809 / Loss  1.1332149505615234
fps: 15.736705061343939
TIMESTEP 1571 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  26.702442 / Loss  0.8376911878585815
fps: 16.117618577340902
TIMESTEP 1572 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  26.638607 / Loss  0.8058668971061707
fps: 12.649257354826062
TIMESTEP 1573 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  16.56054 / Loss  0.49979186058044434
fps: 13.148618147163566
TIMESTEP 1574 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  24.221197 / Loss  1.5112465620040894
fps: 15.861739824300663
TIMESTEP 1575 / STATE explore / EPSILON 0 / ACTION

fps: 12.811181634361045
TIMESTEP 1629 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.397203 / Loss  0.4393133819103241
fps: 12.894322789693897
TIMESTEP 1630 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.893654 / Loss  0.8811095952987671
fps: 13.062542433056986
TIMESTEP 1631 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.365131 / Loss  0.715989351272583
fps: 12.491151997426902
TIMESTEP 1632 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.067371 / Loss  0.3782621920108795
fps: 12.491077597503157
TIMESTEP 1633 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.382698 / Loss  0.7748500108718872
fps: 16.654968530982586
TIMESTEP 1634 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  19.955347 / Loss  0.30289673805236816
fps: 16.248678970449227
TIMESTEP 1635 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.250775 / Loss  0.333089679479599
fps: 15.613393638207977
TIMESTEP 1636 / STATE exp

TIMESTEP 1691 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.758862 / Loss  0.681503415107727
fps: 12.811338159381776
TIMESTEP 1692 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.0524025 / Loss  0.573230504989624
fps: 10.574533206266608
TIMESTEP 1693 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.01936 / Loss  0.4398805797100067
fps: 10.744955181362414
TIMESTEP 1694 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.840225 / Loss  0.7636665105819702
fps: 12.569801696830206
TIMESTEP 1695 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.313306 / Loss  0.33726510405540466
fps: 16.38182428896162
TIMESTEP 1696 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.969501 / Loss  0.23246434330940247
fps: 10.630655839086753
TIMESTEP 1697 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.703398 / Loss  0.643190860748291
fps: 12.649371799434228
TIMESTEP 1698 / STATE explore / EPSILON 0 / ACTION

fps: 10.574453226436738
TIMESTEP 1754 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.104116 / Loss  0.8446015119552612
fps: 10.74506528806421
TIMESTEP 1755 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.497739 / Loss  0.507783830165863
fps: 10.574533206266608
TIMESTEP 1756 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.754695 / Loss  0.2320982813835144
fps: 10.63060195159042
TIMESTEP 1757 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.720202 / Loss  0.5208432674407959
fps: 10.862104775198697
TIMESTEP 1758 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8970127 / Loss  1.0483509302139282
fps: 10.463786049296477
TIMESTEP 1759 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.419232 / Loss  0.5159512162208557
fps: 12.491151997426902
TIMESTEP 1760 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.572134 / Loss  0.24079863727092743
fps: 11.04176823971063
TIMESTEP 1761 / STATE explor

fps: 12.491077597503157
TIMESTEP 1815 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.042627 / Loss  0.4178854525089264
fps: 16.248364240693896
TIMESTEP 1816 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.721831 / Loss  0.38462576270103455
fps: 16.117866324401387
TIMESTEP 1817 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.320205 / Loss  0.31462210416793823
fps: 12.729569368696755
TIMESTEP 1818 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  16.357195 / Loss  0.28950488567352295
fps: 12.729723905805656
TIMESTEP 1819 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.874459 / Loss  0.8894824981689453
fps: 10.981290744861893
TIMESTEP 1820 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.530184 / Loss  0.3880072832107544
fps: 12.49104039787364
TIMESTEP 1821 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.949235 / Loss  0.5471416711807251
fps: 13.061972918768763
TIMESTEP 1822 / STATE 

TIMESTEP 1878 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  17.071507 / Loss  0.3059508204460144
fps: 10.630009225185772
TIMESTEP 1879 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.165875 / Loss  0.43215715885162354
fps: 15.990301292779725
TIMESTEP 1880 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.914273 / Loss  1.0296199321746826
fps: 12.81141642332645
TIMESTEP 1881 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.54803 / Loss  0.16254429519176483
fps: 12.569613349076677
TIMESTEP 1882 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.496934 / Loss  0.6382385492324829
fps: 13.062705160546887
TIMESTEP 1883 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.905139 / Loss  0.3222171366214752
fps: 12.490742808813764
TIMESTEP 1884 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.993452 / Loss  0.3753654956817627
fps: 16.3824641439865
TIMESTEP 1885 / STATE explore / EPSILON 0 / ACTION 0

fps: 10.356382886786816
TIMESTEP 1941 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.473194 / Loss  0.3232940435409546
fps: 13.50405027753094
TIMESTEP 1942 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.30935 / Loss  0.40635865926742554
fps: 12.569500343133877
TIMESTEP 1943 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.074175 / Loss  0.31633061170578003
fps: 12.49130079993329
TIMESTEP 1944 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.254 / Loss  0.47524988651275635
fps: 13.235668709410717
TIMESTEP 1945 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.145807 / Loss  0.7140528559684753
fps: 12.569726357051453
TIMESTEP 1946 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.988317 / Loss  0.2411259561777115
fps: 12.978991895680482
TIMESTEP 1947 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.534139 / Loss  0.4310636818408966
fps: 12.81141642332645
TIMESTEP 1948 / STATE explor

TIMESTEP 2003 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.664068 / Loss  0.2856503129005432
fps: 12.568860014503786
TIMESTEP 2004 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.069515 / Loss  0.20065902173519135
fps: 12.894084952150243
TIMESTEP 2005 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.10624 / Loss  0.6894068717956543
fps: 12.811494688227352
TIMESTEP 2006 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.63229 / Loss  0.41926851868629456
fps: 12.977586356266785
TIMESTEP 2007 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.491369 / Loss  0.42644354701042175
fps: 15.988960259220432
TIMESTEP 2008 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.447235 / Loss  0.46596962213516235
fps: 12.89412459113155
TIMESTEP 2009 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.80513 / Loss  0.19602876901626587
fps: 10.574426566762302
TIMESTEP 2010 / STATE explore / EPSILON 0 / ACT

TIMESTEP 2065 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.25768 / Loss  0.25153613090515137
fps: 12.729801175767177
TIMESTEP 2066 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.508945 / Loss  0.2486833930015564
fps: 12.649295502798687
TIMESTEP 2067 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.761283 / Loss  0.396403431892395
fps: 10.629632070068375
TIMESTEP 2068 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.5996275 / Loss  0.35483235120773315
fps: 12.649219207083531
TIMESTEP 2069 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.933381 / Loss  0.620124101638794
fps: 13.148700586225274
TIMESTEP 2070 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.380564 / Loss  0.5058531165122986
fps: 10.687595236032474
TIMESTEP 2071 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.9006748 / Loss  0.2511458396911621
fps: 10.518841757326793
TIMESTEP 2072 / STATE explore / EPSILON 0 / ACTI

TIMESTEP 2126 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.267639 / Loss  0.8172471523284912
fps: 10.63068278303979
TIMESTEP 2127 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.836684 / Loss  0.2622707486152649
fps: 10.409226164624597
TIMESTEP 2128 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.950004 / Loss  0.26789289712905884
fps: 13.062786525812239
TIMESTEP 2129 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.753702 / Loss  0.4643648564815521
fps: 12.977746973934998
TIMESTEP 2130 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.31222 / Loss  0.26847606897354126
fps: 12.811455555657371
TIMESTEP 2131 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.382524 / Loss  0.21545688807964325
fps: 10.144867103165868
TIMESTEP 2132 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.404937 / Loss  0.20949305593967438
fps: 11.228256533948691
TIMESTEP 2133 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 2188 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.8594394 / Loss  0.24523982405662537
fps: 10.745092815092251
TIMESTEP 2189 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5881667 / Loss  0.2950291931629181
fps: 12.413553884355736
TIMESTEP 2190 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.6949563 / Loss  0.27480316162109375
fps: 10.921045573964282
TIMESTEP 2191 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.54588 / Loss  0.202789768576622
fps: 10.408451228500313
TIMESTEP 2192 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.9844964 / Loss  0.4433330297470093
fps: 10.981233243967829
TIMESTEP 2193 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.119747 / Loss  0.29026493430137634
fps: 12.731308127534543
TIMESTEP 2194 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.620474 / Loss  0.27050846815109253
fps: 12.729878446666769
TIMESTEP 2195 / STATE explore / EPSILON 0 / ACTI

TIMESTEP 2250 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.5504847 / Loss  0.13296037912368774
fps: 10.80307326961167
TIMESTEP 2251 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7818565 / Loss  0.2869022786617279
fps: 12.977907595579044
TIMESTEP 2252 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.478596 / Loss  0.207771897315979
fps: 15.49274361256912
TIMESTEP 2253 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.8930626 / Loss  0.37669408321380615
fps: 13.148618147163566
TIMESTEP 2254 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.9246025 / Loss  0.32259273529052734
fps: 12.649333651001408
TIMESTEP 2255 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.843756 / Loss  0.17020700871944427
fps: 12.894005674918764
TIMESTEP 2256 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.4406514 / Loss  0.1867666244506836
fps: 13.061850886453595
TIMESTEP 2257 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 2313 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.1545515 / Loss  0.1874384582042694
fps: 12.977746973934998
TIMESTEP 2314 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.076878 / Loss  0.1457863301038742
fps: 12.569462674937817
TIMESTEP 2315 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.076972 / Loss  0.24834296107292175
fps: 13.148659366565202
TIMESTEP 2316 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.66236 / Loss  0.2658807039260864
fps: 15.736882192048085
TIMESTEP 2317 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.052056 / Loss  0.373661607503891
fps: 12.413590623890139
TIMESTEP 2318 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.073367 / Loss  0.18716353178024292
fps: 13.503963322365244
TIMESTEP 2319 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.77547 / Loss  0.36110082268714905
fps: 12.81141642332645
TIMESTEP 2320 / STATE explore / EPSILON 0 / ACTIO

fps: 12.648990325460204
TIMESTEP 2375 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.545754 / Loss  0.37647533416748047
fps: 10.86213290516393
TIMESTEP 2376 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.859362 / Loss  0.3667234182357788
fps: 12.72976254066916
TIMESTEP 2377 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.681545 / Loss  0.08436132967472076
fps: 12.49104039787364
TIMESTEP 2378 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.820098 / Loss  0.28280314803123474
fps: 12.977827284260032
TIMESTEP 2379 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.777584 / Loss  0.10064007341861725
fps: 10.803184570647037
TIMESTEP 2380 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.6700916 / Loss  0.14139612019062042
fps: 12.49104039787364
TIMESTEP 2381 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.935734 / Loss  0.1475813090801239
fps: 10.803156745173188
TIMESTEP 2382 / STATE explor

fps: 12.261178671655752
TIMESTEP 2437 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.054892 / Loss  0.23821282386779785
fps: 10.981348246358143
TIMESTEP 2438 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.4742565 / Loss  0.14886069297790527
fps: 12.569538011555705
TIMESTEP 2439 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.058465 / Loss  0.40499359369277954
fps: 12.811455555657371
TIMESTEP 2440 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.523379 / Loss  0.6327086687088013
fps: 13.062623796295142
TIMESTEP 2441 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8011446 / Loss  0.3166951537132263
fps: 15.988594561832189
TIMESTEP 2442 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.1963384 / Loss  0.23858731985092163
fps: 12.729723905805656
TIMESTEP 2443 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.8798547 / Loss  0.32324934005737305
fps: 13.060915381118162
TIMESTEP 2444 / STATE 

fps: 10.803128919842678
TIMESTEP 2499 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.863446 / Loss  0.17058949172496796
fps: 8.88258639457511
TIMESTEP 2500 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.675251 / Loss  0.3283976912498474
fps: 11.165219428309792
TIMESTEP 2501 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.322475 / Loss  0.4036852717399597
fps: 12.649295502798687
TIMESTEP 2502 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.79819 / Loss  0.11788628250360489
fps: 10.68756800281312
TIMESTEP 2503 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.68105 / Loss  0.3608786463737488
fps: 10.687595236032474
TIMESTEP 2504 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.8077254 / Loss  0.19818025827407837
fps: 9.125828967293868
TIMESTEP 2505 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.118952 / Loss  0.2551499009132385
fps: 10.463916574151458
TIMESTEP 2506 / STATE explore

fps: 12.894045313412647
TIMESTEP 2560 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.280598 / Loss  0.2949879765510559
fps: 12.81141642332645
TIMESTEP 2561 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.868761 / Loss  0.1382063925266266
fps: 12.649257354826062
TIMESTEP 2562 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.298603 / Loss  0.11491171270608902
fps: 12.729723905805656
TIMESTEP 2563 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.68495 / Loss  3.8402209281921387
fps: 10.63079056021777
TIMESTEP 2564 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.087639 / Loss  0.22413554787635803
fps: 10.803101094655505
TIMESTEP 2565 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.933839 / Loss  0.19085989892482758
fps: 12.569726357051453
TIMESTEP 2566 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.2818666 / Loss  0.31264209747314453
fps: 10.518841757326793
TIMESTEP 2567 / STATE explo

TIMESTEP 2620 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.9042354 / Loss  0.22755901515483856
fps: 10.687676936523266
TIMESTEP 2621 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.5583396 / Loss  0.16816206276416779
fps: 12.569651018175824
TIMESTEP 2622 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.764696 / Loss  0.1828066110610962
fps: 13.062705160546887
TIMESTEP 2623 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.029196 / Loss  0.5157705545425415
fps: 15.256397292313064
TIMESTEP 2624 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.939446 / Loss  0.427252858877182
fps: 13.062461070832398
TIMESTEP 2625 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.882623 / Loss  0.24451592564582825
fps: 10.861851612067849
TIMESTEP 2626 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.458424 / Loss  0.32082483172416687
fps: 12.81141642332645
TIMESTEP 2627 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 2683 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.226536 / Loss  0.15537899732589722
fps: 10.803101094655505
TIMESTEP 2684 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.697007 / Loss  0.12557865679264069
fps: 16.11520344257886
TIMESTEP 2685 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.26472 / Loss  0.11584547907114029
fps: 12.731346772014922
TIMESTEP 2686 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.923996 / Loss  0.17232570052146912
fps: 10.687595236032474
TIMESTEP 2687 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.395173 / Loss  0.2834242582321167
fps: 10.74506528806421
TIMESTEP 2688 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.133359 / Loss  1.6821880340576172
fps: 10.518815377323238
TIMESTEP 2689 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.730288 / Loss  0.6266148090362549
fps: 10.574533206266608
TIMESTEP 2690 / STATE explore / EPSILON 0 / ACTION 1

TIMESTEP 2744 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.494876 / Loss  0.2177724838256836
fps: 15.493029750075724
TIMESTEP 2745 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.1795025 / Loss  0.10831177234649658
fps: 11.10320947913468
TIMESTEP 2746 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.878038 / Loss  0.11703111231327057
fps: 10.35530899493629
TIMESTEP 2747 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.782133 / Loss  0.36698004603385925
fps: 13.148576928020365
TIMESTEP 2748 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.360629 / Loss  0.6140437126159668
fps: 12.728951257780516
TIMESTEP 2749 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.587016 / Loss  0.27405282855033875
fps: 10.688276111625015
TIMESTEP 2750 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.94868 / Loss  0.32829850912094116
fps: 12.97778712897327
TIMESTEP 2751 / STATE explore / EPSILON 0 / ACT

TIMESTEP 2807 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.849331 / Loss  0.7654163837432861
fps: 12.892459963729136
TIMESTEP 2808 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.017034 / Loss  0.2773541510105133
fps: 10.410027053188056
TIMESTEP 2809 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.494263 / Loss  0.40367257595062256
fps: 10.74525798022237
TIMESTEP 2810 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.132433 / Loss  0.1369933784008026
fps: 12.89412459113155
TIMESTEP 2811 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.571987 / Loss  0.19520239531993866
fps: 10.518815377323238
TIMESTEP 2812 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.491493 / Loss  0.14051717519760132
fps: 13.323837266555907
TIMESTEP 2813 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.099567 / Loss  0.47804921865463257
fps: 10.74506528806421
TIMESTEP 2814 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 2870 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.394308 / Loss  0.14896060526371002
fps: 12.413517145038801
TIMESTEP 2871 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.167034 / Loss  0.23691213130950928
fps: 13.326800687580029
TIMESTEP 2872 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.914863 / Loss  0.15270614624023438
fps: 12.809812203561687
TIMESTEP 2873 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.8200383 / Loss  0.18981347978115082
fps: 15.864139582206521
TIMESTEP 2874 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.022181 / Loss  0.42882534861564636
fps: 12.649257354826062
TIMESTEP 2875 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.020263 / Loss  0.10824118554592133
fps: 13.235543410003881
TIMESTEP 2876 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.511283 / Loss  0.2100282907485962
fps: 12.729955718504446
TIMESTEP 2877 / STATE explore / EPSILON 0 / ACT

TIMESTEP 2932 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5697975 / Loss  0.11632942408323288
fps: 10.921187755824732
TIMESTEP 2933 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.867279 / Loss  0.19579574465751648
fps: 10.803212396264223
TIMESTEP 2934 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.691685 / Loss  0.07999787479639053
fps: 10.574506546188891
TIMESTEP 2935 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.724567 / Loss  0.3004484176635742
fps: 10.630736671355612
TIMESTEP 2936 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.770406 / Loss  0.14562946557998657
fps: 10.572880535613455
TIMESTEP 2937 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.515733 / Loss  0.20133161544799805
fps: 10.863736554057029
TIMESTEP 2938 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.612116 / Loss  0.20196595788002014
fps: 10.630736671355612
TIMESTEP 2939 / STATE explore / EPSILON 0 / ACTI

TIMESTEP 2993 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7553806 / Loss  0.2422754466533661
fps: 10.355334561201666
TIMESTEP 2994 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.793469 / Loss  0.11752837896347046
fps: 11.041971720490404
TIMESTEP 2995 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.460923 / Loss  0.3315493166446686
fps: 10.46381215400697
TIMESTEP 2996 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.5221505 / Loss  0.24104398488998413
fps: 10.921216192641081
TIMESTEP 2997 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.324162 / Loss  0.1871538609266281
fps: 9.125868678824906
TIMESTEP 2998 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.584832 / Loss  0.174547016620636
fps: 10.463759944716234
TIMESTEP 2999 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.522913 / Loss  0.08787745982408524
fps: 10.921329941387382
TIMESTEP 3000 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 3055 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.382528 / Loss  0.17397716641426086
fps: 8.921469532220463
TIMESTEP 3056 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.243269 / Loss  0.32014065980911255
fps: 11.042931088047434
TIMESTEP 3057 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.642158 / Loss  0.254641056060791
fps: 10.574879799510374
TIMESTEP 3058 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.42461 / Loss  0.15517276525497437
fps: 10.745120342261332
TIMESTEP 3059 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.360963 / Loss  0.1230616346001625
fps: 10.5744798862456
TIMESTEP 3060 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.888313 / Loss  0.13249774277210236
fps: 9.25267864824501
TIMESTEP 3061 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.5402975 / Loss  0.14024509489536285
fps: 10.355206731137187
TIMESTEP 3062 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 3117 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.0081072 / Loss  0.5563504695892334
fps: 12.811377291234587
TIMESTEP 3118 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.485483 / Loss  0.2128274291753769
fps: 12.729839811099714
TIMESTEP 3119 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.3742774 / Loss  0.4296914041042328
fps: 12.811377291234587
TIMESTEP 3120 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.855124 / Loss  0.1096004918217659
fps: 10.63068278303979
TIMESTEP 3121 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.049796 / Loss  0.20494093000888824
fps: 10.630709727129409
TIMESTEP 3122 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.368757 / Loss  0.21218767762184143
fps: 9.12596795916458
TIMESTEP 3123 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.525668 / Loss  0.2843620479106903
fps: 10.518841757326793
TIMESTEP 3124 / STATE explore / EPSILON 0 / ACTION 1

TIMESTEP 3178 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.5070305 / Loss  0.3697095215320587
fps: 12.729801175767177
TIMESTEP 3179 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.714506 / Loss  0.219862699508667
fps: 12.97778712897327
TIMESTEP 3180 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.278295 / Loss  0.12351992726325989
fps: 10.463786049296477
TIMESTEP 3181 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8451405 / Loss  0.3103533387184143
fps: 11.041855444840795
TIMESTEP 3182 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.274659 / Loss  0.10077294707298279
fps: 12.729801175767177
TIMESTEP 3183 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.394201 / Loss  0.1048475056886673
fps: 10.687595236032474
TIMESTEP 3184 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.908619 / Loss  0.19794556498527527
fps: 10.63079056021777
TIMESTEP 3185 / STATE explore / EPSILON 0 / ACTION 0 

fps: 13.235710476406863
TIMESTEP 3241 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.5911536 / Loss  0.3095276355743408
fps: 12.811377291234587
TIMESTEP 3242 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.563524 / Loss  0.3126662075519562
fps: 12.569726357051453
TIMESTEP 3243 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.650578 / Loss  0.16352763772010803
fps: 13.062664478294316
TIMESTEP 3244 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.323218 / Loss  0.21704798936843872
fps: 12.81141642332645
TIMESTEP 3245 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.864438 / Loss  0.24798284471035004
fps: 10.40927783113573
TIMESTEP 3246 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.935716 / Loss  0.2241853028535843
fps: 10.92124462960552
TIMESTEP 3247 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.706028 / Loss  0.18836945295333862
fps: 10.630709727129409
TIMESTEP 3248 / STATE ex

TIMESTEP 3302 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.87088 / Loss  0.17501972615718842
fps: 10.301947501700901
TIMESTEP 3303 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  8.268402 / Loss  0.19036416709423065
fps: 11.041710103722425
TIMESTEP 3304 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.716018 / Loss  0.22609728574752808
fps: 9.210153711023276
TIMESTEP 3305 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.051378 / Loss  0.3833855390548706
fps: 10.301770380995421
TIMESTEP 3306 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.7774553 / Loss  0.08347269892692566
fps: 11.041884513523616
TIMESTEP 3307 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.3679385 / Loss  0.31111183762550354
fps: 10.574399907222292
TIMESTEP 3308 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.350324 / Loss  0.38101738691329956
fps: 9.125987815491731
TIMESTEP 3309 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 3363 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.928241 / Loss  0.2039203941822052
fps: 9.252760294990326
TIMESTEP 3364 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.5588255 / Loss  0.31215792894363403
fps: 10.630844449626274
TIMESTEP 3365 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.196908 / Loss  0.18052369356155396
fps: 10.5744798862456
TIMESTEP 3366 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.157402 / Loss  0.20554697513580322
fps: 12.411937560109491
TIMESTEP 3367 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.4941325 / Loss  0.10373914986848831
fps: 11.043163687106711
TIMESTEP 3368 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.7495337 / Loss  0.1651252955198288
fps: 8.961536982355982
TIMESTEP 3369 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.6122575 / Loss  0.05928139388561249
fps: 10.575439728901081
TIMESTEP 3370 / STATE explore / EPSILON 0 / ACTION

TIMESTEP 3424 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5304065 / Loss  0.1413501501083374
fps: 10.74506528806421
TIMESTEP 3425 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.9359217 / Loss  0.11399485170841217
fps: 10.51702184744152
TIMESTEP 3426 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.578661 / Loss  0.1739494651556015
fps: 10.803240222024753
TIMESTEP 3427 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.305674 / Loss  0.2146545797586441
fps: 9.167777401339876
TIMESTEP 3428 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.1108966 / Loss  0.0879674181342125
fps: 10.355334561201666
TIMESTEP 3429 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.137583 / Loss  0.2082173228263855
fps: 13.323794941518052
TIMESTEP 3430 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.272236 / Loss  0.15667928755283356
fps: 10.687676936523266
TIMESTEP 3431 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 3486 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.842827 / Loss  0.10311524569988251
fps: 10.249108094107068
TIMESTEP 3487 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.64899 / Loss  0.35200488567352295
fps: 11.2280160725137
TIMESTEP 3488 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.059963 / Loss  0.30924999713897705
fps: 12.413553884355736
TIMESTEP 3489 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.168048 / Loss  0.3841623067855835
fps: 10.861879740721847
TIMESTEP 3490 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.761435 / Loss  0.16924573481082916
fps: 12.811377291234587
TIMESTEP 3491 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.214742 / Loss  0.21864108741283417
fps: 10.355232296897606
TIMESTEP 3492 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.242675 / Loss  0.29353147745132446
fps: 10.302048716267166
TIMESTEP 3493 / STATE explore / EPSILON 0 / ACTION

TIMESTEP 3547 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.026623 / Loss  0.258291631937027
fps: 10.861879740721847
TIMESTEP 3548 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.076668 / Loss  0.38809579610824585
fps: 12.811455555657371
TIMESTEP 3549 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.2695684 / Loss  0.3874925971031189
fps: 10.74501023443124
TIMESTEP 3550 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.908939 / Loss  0.27025365829467773
fps: 10.463707735946493
TIMESTEP 3551 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.9347432 / Loss  0.5245252847671509
fps: 10.745120342261332
TIMESTEP 3552 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.854031 / Loss  0.16464705765247345
fps: 10.687595236032474
TIMESTEP 3553 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.003619 / Loss  0.7223504781723022
fps: 8.962092285338832
TIMESTEP 3554 / STATE explore / EPSILON 0 / ACTIO

fps: 10.408270426673417
TIMESTEP 3608 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.2801437 / Loss  0.18077310919761658
fps: 10.631760653169652
TIMESTEP 3609 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.001321 / Loss  0.3435311019420624
fps: 13.235626942678174
TIMESTEP 3610 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.327804 / Loss  0.06214817613363266
fps: 10.574533206266608
TIMESTEP 3611 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.149618 / Loss  0.10275313258171082
fps: 12.647273977270329
TIMESTEP 3612 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.553515 / Loss  0.5891128182411194
fps: 10.745808567329371
TIMESTEP 3613 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4266267 / Loss  0.24813923239707947
fps: 12.978670602687759
TIMESTEP 3614 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.206611 / Loss  0.5208361148834229
fps: 10.518894517730851
TIMESTEP 3615 / STATE exp

TIMESTEP 3670 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.600108 / Loss  0.28132158517837524
fps: 10.981146993755809
TIMESTEP 3671 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.377035 / Loss  0.13253171741962433
fps: 10.46381215400697
TIMESTEP 3672 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.442682 / Loss  0.15772011876106262
fps: 13.062583114549367
TIMESTEP 3673 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.3001814 / Loss  0.2829333245754242
fps: 12.649219207083531
TIMESTEP 3674 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.001976 / Loss  0.3356193006038666
fps: 10.861851612067849
TIMESTEP 3675 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.0357003 / Loss  0.2751449942588806
fps: 10.518815377323238
TIMESTEP 3676 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.869367 / Loss  0.07231283187866211
fps: 10.745120342261332
TIMESTEP 3677 / STATE explore / EPSILON 0 / ACTIO

fps: 11.103180086669473
TIMESTEP 3732 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.155966 / Loss  0.08085565268993378
fps: 12.18647382015649
TIMESTEP 3733 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.704368 / Loss  0.16314810514450073
fps: 13.413359940645227
TIMESTEP 3734 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.320091 / Loss  0.14469294250011444
fps: 12.893966036668592
TIMESTEP 3735 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.881235 / Loss  0.23073971271514893
fps: 10.407547282174459
TIMESTEP 3736 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.7216964 / Loss  0.13984912633895874
fps: 10.80496569916766
TIMESTEP 3737 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.189598 / Loss  0.28766754269599915
fps: 9.210072814468033
TIMESTEP 3738 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.318236 / Loss  0.1797845959663391
fps: 10.68756800281312
TIMESTEP 3739 / STATE explo

TIMESTEP 3793 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.767898 / Loss  1.1363921165466309
fps: 10.46355111276316
TIMESTEP 3794 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.825603 / Loss  0.11436726152896881
fps: 12.894283149493981
TIMESTEP 3795 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9376225 / Loss  0.29981470108032227
fps: 10.86182348355954
TIMESTEP 3796 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.0937524 / Loss  0.592189610004425
fps: 10.803101094655505
TIMESTEP 3797 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.423086 / Loss  0.21068693697452545
fps: 10.63079056021777
TIMESTEP 3798 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.6598287 / Loss  0.3188484311103821
fps: 10.5744798862456
TIMESTEP 3799 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.151453 / Loss  0.32122552394866943
fps: 10.301567964789562
TIMESTEP 3800 / STATE explore / EPSILON 0 / ACTION 1 /

TIMESTEP 3854 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.068088 / Loss  0.14900687336921692
fps: 10.68748630398777
TIMESTEP 3855 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.237648 / Loss  0.334983229637146
fps: 10.803128919842678
TIMESTEP 3856 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.0808387 / Loss  0.21039380133152008
fps: 10.687622469390615
TIMESTEP 3857 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.121698 / Loss  0.24073219299316406
fps: 12.729801175767177
TIMESTEP 3858 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.544658 / Loss  0.2388775646686554
fps: 12.894084952150243
TIMESTEP 3859 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.13215 / Loss  0.09748747944831848
fps: 14.804279305512907
TIMESTEP 3860 / STATE explore / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  7.1304846 / Loss  0.19284510612487793
fps: 13.688892370154242
TIMESTEP 3861 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 3916 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.516399 / Loss  0.45047271251678467
fps: 15.988472666420162
TIMESTEP 3917 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.7789497 / Loss  0.15872597694396973
fps: 12.56934967170423
TIMESTEP 3918 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.936397 / Loss  0.1948089897632599
fps: 12.649562545049868
TIMESTEP 3919 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.664954 / Loss  0.29067519307136536
fps: 13.148535709135595
TIMESTEP 3920 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.16336 / Loss  0.18478351831436157
fps: 12.977827284260032
TIMESTEP 3921 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.95469 / Loss  0.35139989852905273
fps: 12.413553884355736
TIMESTEP 3922 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.371168 / Loss  0.1436072289943695
fps: 12.977827284260032
TIMESTEP 3923 / STATE explore / EPSILON 0 / ACTI

TIMESTEP 3978 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.171237 / Loss  0.10248111933469772
fps: 10.687649702887546
TIMESTEP 3979 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.724776 / Loss  0.28342294692993164
fps: 12.41329671370395
TIMESTEP 3980 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.451238 / Loss  0.5592145919799805
fps: 11.10320947913468
TIMESTEP 3981 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7744637 / Loss  0.23416370153427124
fps: 10.745120342261332
TIMESTEP 3982 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.5533733 / Loss  0.14250782132148743
fps: 9.943161667788766
TIMESTEP 3983 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  9.037303 / Loss  0.1984327733516693
fps: 11.486114420135666
TIMESTEP 3984 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.7209406 / Loss  0.2600046992301941
fps: 10.745092815092251
TIMESTEP 3985 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 4040 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.408499 / Loss  0.17499591410160065
fps: 13.503919845202335
TIMESTEP 4041 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.305752 / Loss  0.1719542145729065
fps: 12.569726357051453
TIMESTEP 4042 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.0669003 / Loss  0.09336983412504196
fps: 12.81141642332645
TIMESTEP 4043 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.980985 / Loss  0.3760993480682373
fps: 12.894084952150243
TIMESTEP 4044 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.349542 / Loss  0.20027950406074524
fps: 12.97778712897327
TIMESTEP 4045 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.141444 / Loss  0.14559313654899597
fps: 12.649257354826062
TIMESTEP 4046 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.9498625 / Loss  0.2316141426563263
fps: 16.11755664176581
TIMESTEP 4047 / STATE explore / EPSILON 0 / ACTION 

fps: 9.383020514082459
TIMESTEP 4103 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.062216 / Loss  0.2109038233757019
fps: 10.80307326961167
TIMESTEP 4104 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.257624 / Loss  0.17548829317092896
fps: 12.811455555657371
TIMESTEP 4105 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.167272 / Loss  0.4542926549911499
fps: 10.5744798862456
TIMESTEP 4106 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.598863 / Loss  0.15307673811912537
fps: 10.687595236032474
TIMESTEP 4107 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.1824512 / Loss  0.13423919677734375
fps: 9.084479098982023
TIMESTEP 4108 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.666475 / Loss  0.21043509244918823
fps: 10.861795355196918
TIMESTEP 4109 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4201593 / Loss  0.1698441207408905
fps: 10.518788997451999
TIMESTEP 4110 / STATE explore

fps: 10.249083049675372
TIMESTEP 4165 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.901074 / Loss  3.053414821624756
fps: 10.574533206266608
TIMESTEP 4166 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  9.704155 / Loss  0.189034104347229
fps: 10.981146993755809
TIMESTEP 4167 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.1705413 / Loss  0.11377258598804474
fps: 12.977827284260032
TIMESTEP 4168 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.38181 / Loss  0.22894108295440674
fps: 10.745120342261332
TIMESTEP 4169 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.47132 / Loss  0.34256553649902344
fps: 10.5744798862456
TIMESTEP 4170 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.085616 / Loss  0.3398095667362213
fps: 10.921187755824732
TIMESTEP 4171 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.494156 / Loss  0.19482430815696716
fps: 12.729801175767177
TIMESTEP 4172 / STATE explore 

TIMESTEP 4225 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.716153 / Loss  0.22285063564777374
fps: 13.062623796295142
TIMESTEP 4226 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.22187 / Loss  0.2674577534198761
fps: 12.89412459113155
TIMESTEP 4227 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.13648 / Loss  0.3232465088367462
fps: 12.56818209058983
TIMESTEP 4228 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.1392627 / Loss  0.265531450510025
fps: 10.302934428565113
TIMESTEP 4229 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  10.012124 / Loss  0.21689900755882263
fps: 11.2280160725137
TIMESTEP 4230 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.581964 / Loss  0.14463217556476593
fps: 10.744900128857726
TIMESTEP 4231 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.634671 / Loss  0.19177371263504028
fps: 10.043037786764934
TIMESTEP 4232 / STATE explore / EPSILON 0 / ACTION 1 / RE

fps: 10.630736671355612
TIMESTEP 4287 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.590853 / Loss  0.469197154045105
fps: 10.687595236032474
TIMESTEP 4288 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.314378 / Loss  0.11256957054138184
fps: 10.5744798862456
TIMESTEP 4289 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8163335 / Loss  0.24118797481060028
fps: 10.630628895270297
TIMESTEP 4290 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.6503215 / Loss  0.14680761098861694
fps: 10.86196412755797
TIMESTEP 4291 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.106028 / Loss  0.10122692584991455
fps: 12.894758847979537
TIMESTEP 4292 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.1506195 / Loss  0.2981342077255249
fps: 12.649295502798687
TIMESTEP 4293 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.928128 / Loss  0.334711492061615
fps: 10.686043164001294
TIMESTEP 4294 / STATE explore

TIMESTEP 4349 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.321675 / Loss  0.38493430614471436
fps: 8.843257557548693
TIMESTEP 4350 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.6299863 / Loss  0.39351320266723633
fps: 8.224625418407786
TIMESTEP 4351 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.0828617 / Loss  0.2743930220603943
fps: 9.210072814468033
TIMESTEP 4352 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.480368 / Loss  0.21971678733825684
fps: 9.043229281208092
TIMESTEP 4353 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.521114 / Loss  0.3101312518119812
fps: 9.252862355448292
TIMESTEP 4354 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.6348753 / Loss  0.2315429151058197
fps: 9.210012142984187
TIMESTEP 4355 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.242088 / Loss  0.3774026036262512
fps: 9.126027528405258
TIMESTEP 4356 / STATE explore / EPSILON 0 / ACTION 1 / R

TIMESTEP 4411 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.021288 / Loss  0.2575349807739258
fps: 13.235835778976933
TIMESTEP 4412 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.912911 / Loss  0.15441130101680756
fps: 9.04330727336234
TIMESTEP 4413 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.444965 / Loss  0.2258807271718979
fps: 10.687595236032474
TIMESTEP 4414 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.9831715 / Loss  0.2712041139602661
fps: 10.803435006362077
TIMESTEP 4415 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.3793235 / Loss  0.21655036509037018
fps: 10.68756800281312
TIMESTEP 4416 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.364491 / Loss  0.27492666244506836
fps: 10.630763615718399
TIMESTEP 4417 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.074585 / Loss  0.1544155478477478
fps: 10.63079056021777
TIMESTEP 4418 / STATE explore / EPSILON 0 / ACTION 1 /

TIMESTEP 4474 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.179166 / Loss  0.49291548132896423
fps: 10.745120342261332
TIMESTEP 4475 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.426201 / Loss  0.4034050703048706
fps: 12.413333451716106
TIMESTEP 4476 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.359785 / Loss  0.39023900032043457
fps: 13.235710476406863
TIMESTEP 4477 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.746084 / Loss  0.23694944381713867
fps: 12.491189197721134
TIMESTEP 4478 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.1279364 / Loss  0.33749186992645264
fps: 10.921216192641081
TIMESTEP 4479 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9989204 / Loss  0.2608431577682495
fps: 10.68756800281312
TIMESTEP 4480 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.569478 / Loss  0.14654356241226196
fps: 15.61380049734205
TIMESTEP 4481 / STATE explore / EPSILON 0 / AC

TIMESTEP 4536 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.900606 / Loss  0.11998645961284637
fps: 10.5744798862456
TIMESTEP 4537 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.914716 / Loss  0.29911890625953674
fps: 12.569651018175824
TIMESTEP 4538 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.0787735 / Loss  1.1390748023986816
fps: 10.687649702887546
TIMESTEP 4539 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.252317 / Loss  0.1159108355641365
fps: 10.803101094655505
TIMESTEP 4540 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.991569 / Loss  0.20058761537075043
fps: 10.687649702887546
TIMESTEP 4541 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.1901 / Loss  0.2856125831604004
fps: 10.803101094655505
TIMESTEP 4542 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.6487536 / Loss  0.24262166023254395
fps: 10.354772132523577
TIMESTEP 4543 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 4598 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.289467 / Loss  1.0085153579711914
fps: 10.518657100080501
TIMESTEP 4599 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.026309 / Loss  0.6452968120574951
fps: 10.803351526500292
TIMESTEP 4600 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.206792 / Loss  0.4951922595500946
fps: 12.894045313412647
TIMESTEP 4601 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.471435 / Loss  0.49125897884368896
fps: 12.729376202586966
TIMESTEP 4602 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.643666 / Loss  0.5857609510421753
fps: 10.57482647599406
TIMESTEP 4603 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.400546 / Loss  0.3863952159881592
fps: 13.062542433056986
TIMESTEP 4604 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.5027323 / Loss  0.30633604526519775
fps: 12.811377291234587
TIMESTEP 4605 / STATE explore / EPSILON 0 / ACT

fps: 10.745092815092251
TIMESTEP 4661 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.5897648 / Loss  0.24523311853408813
fps: 12.259780953410948
TIMESTEP 4662 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.378632 / Loss  0.10049629956483841
fps: 13.325530488599142
TIMESTEP 4663 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.675591 / Loss  0.16105929017066956
fps: 12.729801175767177
TIMESTEP 4664 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.87401 / Loss  0.19489824771881104
fps: 10.518841757326793
TIMESTEP 4665 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.0365148 / Loss  0.14570793509483337
fps: 10.92130150397867
TIMESTEP 4666 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.0515466 / Loss  0.4453839063644409
fps: 12.64784604157746
TIMESTEP 4667 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.6327562 / Loss  0.34797924757003784
fps: 12.650478204088035
TIMESTEP 4668 / STATE ex

TIMESTEP 4723 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.086855 / Loss  0.25057488679885864
fps: 8.962226334300574
TIMESTEP 4724 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.88737 / Loss  0.236954003572464
fps: 9.210072814468033
TIMESTEP 4725 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.677245 / Loss  0.20841416716575623
fps: 10.630817504853729
TIMESTEP 4726 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.879244 / Loss  1.6649270057678223
fps: 10.861795355196918
TIMESTEP 4727 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.004008 / Loss  0.15773051977157593
fps: 9.12590839070156
TIMESTEP 4728 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.984452 / Loss  0.20373231172561646
fps: 10.687595236032474
TIMESTEP 4729 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8447447 / Loss  0.20358259975910187
fps: 10.5165471919043
TIMESTEP 4730 / STATE explore / EPSILON 0 / ACTION 1 / RE

TIMESTEP 4785 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.878334 / Loss  0.35352760553359985
fps: 10.46362942376873
TIMESTEP 4786 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.205221 / Loss  0.35209545493125916
fps: 10.981348246358143
TIMESTEP 4787 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.948714 / Loss  0.6530231237411499
fps: 10.63079056021777
TIMESTEP 4788 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.376764 / Loss  0.536153256893158
fps: 10.630763615718399
TIMESTEP 4789 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.931423 / Loss  0.22473622858524323
fps: 10.630709727129409
TIMESTEP 4790 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.481117 / Loss  0.22504857182502747
fps: 9.084459422873245
TIMESTEP 4791 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.798413 / Loss  0.4454975724220276
fps: 13.062623796295142
TIMESTEP 4792 / STATE explore / EPSILON 0 / ACTION

fps: 10.463786049296477
TIMESTEP 4846 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.633317 / Loss  0.29029130935668945
fps: 13.323879591862667
TIMESTEP 4847 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.218932 / Loss  0.2738625407218933
fps: 12.72976254066916
TIMESTEP 4848 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.742351 / Loss  0.6002694368362427
fps: 10.40803797641115
TIMESTEP 4849 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.021383 / Loss  0.48500654101371765
fps: 13.2375902641014
TIMESTEP 4850 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.7341976 / Loss  0.3475649952888489
fps: 10.63079056021777
TIMESTEP 4851 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.953611 / Loss  0.6446132063865662
fps: 10.196634430717019
TIMESTEP 4852 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.715227 / Loss  0.23703253269195557
fps: 11.165070821454336
TIMESTEP 4853 / STATE explor

TIMESTEP 4907 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.20182635 / Loss  0.1758776605129242
fps: 10.74498270782631
TIMESTEP 4908 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.58522 / Loss  0.3683950901031494
fps: 10.463838258847714
TIMESTEP 4909 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.6302528 / Loss  0.46436846256256104
fps: 10.981204493746613
TIMESTEP 4910 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.186606 / Loss  0.22848805785179138
fps: 10.574533206266608
TIMESTEP 4911 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.731996 / Loss  0.4403610825538635
fps: 13.062623796295142
TIMESTEP 4912 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.746801 / Loss  0.10714124143123627
fps: 10.463759944716234
TIMESTEP 4913 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2364032 / Loss  0.46355879306793213
fps: 10.803212396264223
TIMESTEP 4914 / STATE explore / EPSILON 0 / ACTIO

fps: 10.687595236032474
TIMESTEP 4969 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.102041 / Loss  0.6306557059288025
fps: 12.72976254066916
TIMESTEP 4970 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.616881 / Loss  0.11400319635868073
fps: 12.811455555657371
TIMESTEP 4971 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.095406 / Loss  0.38380032777786255
fps: 12.894045313412647
TIMESTEP 4972 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.50299 / Loss  0.8842828273773193
fps: 10.687595236032474
TIMESTEP 4973 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.409568 / Loss  0.28109779953956604
fps: 12.811377291234587
TIMESTEP 4974 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.332015 / Loss  0.31522682309150696
fps: 10.301947501700901
TIMESTEP 4975 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.482975 / Loss  0.24454936385154724
fps: 10.861879740721847
TIMESTEP 4976 / STATE explor

TIMESTEP 5032 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.472191 / Loss  0.43290138244628906
fps: 11.896304868225497
TIMESTEP 5033 / STATE explore / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  10.865454 / Loss  0.45606905221939087
fps: 9.749162060517593
TIMESTEP 5034 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.131961 / Loss  0.38982659578323364
fps: 9.125888534720032
TIMESTEP 5035 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.627949 / Loss  0.5270894765853882
fps: 9.252760294990326
TIMESTEP 5036 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.702057 / Loss  0.8213912844657898
fps: 9.210052590551268
TIMESTEP 5037 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.311084 / Loss  2.6755919456481934
fps: 9.167797440022383
TIMESTEP 5038 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.195197 / Loss  0.4038500189781189
fps: 9.12590839070156
TIMESTEP 5039 / STATE explore / EPSILON 0 / ACTION 1 / 

TIMESTEP 5094 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.473969 / Loss  0.7528895139694214
fps: 10.145063408097563
TIMESTEP 5095 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.3880053 / Loss  0.21339471638202667
fps: 11.103474018324434
TIMESTEP 5096 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.487272 / Loss  0.46923041343688965
fps: 10.74506528806421
TIMESTEP 5097 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.067551 / Loss  0.840782105922699
fps: 10.574399907222292
TIMESTEP 5098 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.2821548 / Loss  0.5318172574043274
fps: 9.210153711023276
TIMESTEP 5099 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8384724 / Loss  0.7265430688858032
fps: 10.249108094107068
TIMESTEP 5100 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  7.517649 / Loss  0.17710071802139282
fps: 11.165219428309792
TIMESTEP 5101 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 5156 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7749267 / Loss  0.09573331475257874
fps: 10.861851612067849
TIMESTEP 5157 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.949846 / Loss  0.1612168252468109
fps: 10.74506528806421
TIMESTEP 5158 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.26972 / Loss  0.23334309458732605
fps: 10.409329498159762
TIMESTEP 5159 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9148088 / Loss  0.2191477119922638
fps: 11.041884513523616
TIMESTEP 5160 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.56117 / Loss  0.2315348982810974
fps: 10.574453226436738
TIMESTEP 5161 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.292637 / Loss  0.1771266609430313
fps: 10.68754076973255
TIMESTEP 5162 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.7325616 / Loss  0.2027631402015686
fps: 10.092675520177872
TIMESTEP 5163 / STATE explore / EPSILON 0 / ACTION 1 /

TIMESTEP 5217 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.545709 / Loss  0.15879078209400177
fps: 13.062664478294316
TIMESTEP 5218 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.388618 / Loss  0.717657744884491
fps: 10.408838682138994
TIMESTEP 5219 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.876386 / Loss  0.20400479435920715
fps: 12.894283149493981
TIMESTEP 5220 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.040749 / Loss  0.2730930745601654
fps: 10.803156745173188
TIMESTEP 5221 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.0962777 / Loss  0.24961254000663757
fps: 12.729067149004875
TIMESTEP 5222 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.0974853 / Loss  0.2107114940881729
fps: 12.894838134472899
TIMESTEP 5223 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.488323 / Loss  0.2208065390586853
fps: 10.518868137462663
TIMESTEP 5224 / STATE explore / EPSILON 0 / ACTION

fps: 8.157489993504043
TIMESTEP 5279 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.030869 / Loss  0.21319568157196045
fps: 7.930869912150332
TIMESTEP 5280 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.347017 / Loss  0.21337401866912842
fps: 8.124355222047678
TIMESTEP 5281 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.691371 / Loss  0.33094823360443115
fps: 8.962207184203384
TIMESTEP 5282 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.00004 / Loss  0.1176929771900177
fps: 7.215088315638095
TIMESTEP 5283 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.893794 / Loss  0.31512290239334106
fps: 7.837549868729621
TIMESTEP 5284 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.1292095 / Loss  0.549267590045929
fps: 9.339271081976557
TIMESTEP 5285 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.608367 / Loss  0.2545253038406372
fps: 7.9624119385757925
TIMESTEP 5286 / STATE explore 

TIMESTEP 5341 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.909821 / Loss  0.26189178228378296
fps: 9.252597002940586
TIMESTEP 5342 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.168248 / Loss  0.1789807230234146
fps: 10.630736671355612
TIMESTEP 5343 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.994251 / Loss  0.2024553418159485
fps: 9.043443762869371
TIMESTEP 5344 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.970245 / Loss  0.4422153830528259
fps: 10.687595236032474
TIMESTEP 5345 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.773132 / Loss  0.5103968977928162
fps: 9.25265823678382
TIMESTEP 5346 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.309363 / Loss  1.2304807901382446
fps: 10.518762617713074
TIMESTEP 5347 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.054495 / Loss  0.5769857168197632
fps: 10.463786049296477
TIMESTEP 5348 / STATE explore / EPSILON 0 / ACTION 1

TIMESTEP 5403 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.0011706 / Loss  0.2228023260831833
fps: 10.803184570647037
TIMESTEP 5404 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.718208 / Loss  0.5949179530143738
fps: 10.144670805830948
TIMESTEP 5405 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.188395 / Loss  0.6807769536972046
fps: 9.608547642937978
TIMESTEP 5406 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.7881565 / Loss  0.3093052804470062
fps: 10.40927783113573
TIMESTEP 5407 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.794509 / Loss  0.5477738380432129
fps: 9.25267864824501
TIMESTEP 5408 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.454831 / Loss  0.23064890503883362
fps: 10.745092815092251
TIMESTEP 5409 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.859726 / Loss  0.7189788818359375
fps: 10.63068278303979
TIMESTEP 5410 / STATE explore / EPSILON 0 / ACTION 1

TIMESTEP 5465 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.003598 / Loss  0.4222627580165863
fps: 10.74487260281693
TIMESTEP 5466 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.531652 / Loss  0.6432938575744629
fps: 10.355334561201666
TIMESTEP 5467 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.004769 / Loss  0.28119996190071106
fps: 11.10341523074846
TIMESTEP 5468 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.186103 / Loss  0.1255084127187729
fps: 12.649448096990168
TIMESTEP 5469 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.542713 / Loss  0.5727057456970215
fps: 12.893966036668592
TIMESTEP 5470 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8793044 / Loss  0.1888863444328308
fps: 12.491077597503157
TIMESTEP 5471 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.613198 / Loss  0.31464883685112
fps: 13.148535709135595
TIMESTEP 5472 / STATE explore / EPSILON 0 / ACTION 0

TIMESTEP 5527 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.650032 / Loss  0.1727575659751892
fps: 10.574453226436738
TIMESTEP 5528 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.1427636 / Loss  0.1822972446680069
fps: 10.688630201219139
TIMESTEP 5529 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.585124 / Loss  0.14570794999599457
fps: 12.978309167084394
TIMESTEP 5530 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.941079 / Loss  0.36753714084625244
fps: 10.630736671355612
TIMESTEP 5531 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.27267 / Loss  0.23204249143600464
fps: 10.74506528806421
TIMESTEP 5532 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.748378 / Loss  0.12962985038757324
fps: 10.574533206266608
TIMESTEP 5533 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.74449 / Loss  1.0044574737548828
fps: 10.463681631756993
TIMESTEP 5534 / STATE explore / EPSILON 0 / ACTION 1 

fps: 10.921216192641081
TIMESTEP 5588 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.8924303 / Loss  0.22015827894210815
fps: 10.409251997816051
TIMESTEP 5589 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.7632194 / Loss  0.3715929388999939
fps: 13.062542433056986
TIMESTEP 5590 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.068214 / Loss  0.2742420732975006
fps: 12.89412459113155
TIMESTEP 5591 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.246631 / Loss  0.5793748497962952
fps: 12.569688687500749
TIMESTEP 5592 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.6658573 / Loss  0.13884255290031433
fps: 13.062623796295142
TIMESTEP 5593 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.250688 / Loss  0.19095435738563538
fps: 10.630763615718399
TIMESTEP 5594 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.909678 / Loss  0.3237505257129669
fps: 12.569462674937817
TIMESTEP 5595 / STATE expl

fps: 13.064251273936932
TIMESTEP 5650 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.752185 / Loss  0.1484414041042328
fps: 10.68754076973255
TIMESTEP 5651 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.7745986 / Loss  0.5221601128578186
fps: 12.489441023381017
TIMESTEP 5652 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.462824 / Loss  0.8392019867897034
fps: 13.064536124219346
TIMESTEP 5653 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.2408633 / Loss  0.31058791279792786
fps: 10.300935465411849
TIMESTEP 5654 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.910417 / Loss  0.16334033012390137
fps: 12.813021044946188
TIMESTEP 5655 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.9127593 / Loss  0.5589344501495361
fps: 13.235585176209229
TIMESTEP 5656 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.803182 / Loss  0.1872575283050537
fps: 12.649219207083531
TIMESTEP 5657 / STATE e

TIMESTEP 5713 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.427928 / Loss  0.5044195652008057
fps: 12.977746973934998
TIMESTEP 5714 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.17606 / Loss  0.3925952911376953
fps: 10.574453226436738
TIMESTEP 5715 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.90181 / Loss  0.3366449773311615
fps: 10.630736671355612
TIMESTEP 5716 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.990782 / Loss  0.12923133373260498
fps: 10.630763615718399
TIMESTEP 5717 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.062467 / Loss  0.15884831547737122
fps: 12.811377291234587
TIMESTEP 5718 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.3174305 / Loss  0.17657563090324402
fps: 10.630763615718399
TIMESTEP 5719 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.040924 / Loss  0.2623237371444702
fps: 10.803128919842678
TIMESTEP 5720 / STATE explore / EPSILON 0 / ACTION

fps: 10.74501023443124
TIMESTEP 5775 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.644209 / Loss  0.21441274881362915
fps: 10.249183228136538
TIMESTEP 5776 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.7831566 / Loss  0.3241144120693207
fps: 10.980974497396332
TIMESTEP 5777 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.749988 / Loss  0.25221267342567444
fps: 10.861795355196918
TIMESTEP 5778 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.226216 / Loss  0.4066528081893921
fps: 10.574453226436738
TIMESTEP 5779 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.351666 / Loss  0.8290376663208008
fps: 10.745037761177207
TIMESTEP 5780 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.586854 / Loss  0.23686347901821136
fps: 10.687622469390615
TIMESTEP 5781 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.114166 / Loss  0.23084929585456848
fps: 12.649104765236482
TIMESTEP 5782 / STATE explo

TIMESTEP 5837 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.763458 / Loss  1.4893871545791626
fps: 10.63079056021777
TIMESTEP 5838 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.9774795 / Loss  0.492372989654541
fps: 10.463786049296477
TIMESTEP 5839 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.482817 / Loss  0.368590772151947
fps: 10.63079056021777
TIMESTEP 5840 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.9097385 / Loss  0.5368348360061646
fps: 10.861739098908725
TIMESTEP 5841 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.4310255 / Loss  0.38993462920188904
fps: 10.745092815092251
TIMESTEP 5842 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.519201 / Loss  0.24946077167987823
fps: 10.74506528806421
TIMESTEP 5843 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.590924 / Loss  0.3585251569747925
fps: 10.630763615718399
TIMESTEP 5844 / STATE explore / EPSILON 0 / ACTION

fps: 10.687513536790767
TIMESTEP 5900 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.080731 / Loss  0.45414969325065613
fps: 10.46362942376873
TIMESTEP 5901 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5110545 / Loss  0.5221958160400391
fps: 10.687922045490453
TIMESTEP 5902 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.665095 / Loss  0.2677488923072815
fps: 10.687595236032474
TIMESTEP 5903 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.555174 / Loss  0.35663101077079773
fps: 10.80307326961167
TIMESTEP 5904 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.589642 / Loss  0.5907294750213623
fps: 10.80207166348603
TIMESTEP 5905 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.890089 / Loss  0.29889947175979614
fps: 10.519817910485196
TIMESTEP 5906 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5382285 / Loss  0.3809754550457001
fps: 10.687649702887546
TIMESTEP 5907 / STATE ex

fps: 10.46352500935512
TIMESTEP 5963 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.958459 / Loss  1.318865418434143
fps: 10.687922045490453
TIMESTEP 5964 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.8504806 / Loss  1.1544444561004639
fps: 13.148535709135595
TIMESTEP 5965 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.551298 / Loss  0.5231037735939026
fps: 12.894084952150243
TIMESTEP 5966 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.7868395 / Loss  1.3155707120895386
fps: 10.518815377323238
TIMESTEP 5967 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.099964 / Loss  0.7614901065826416
fps: 12.894084952150243
TIMESTEP 5968 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.228817 / Loss  0.3964693546295166
fps: 10.630709727129409
TIMESTEP 5969 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.072455 / Loss  0.7957946062088013
fps: 10.408580376508192
TIMESTEP 5970 / STATE explo

TIMESTEP 6024 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.156656 / Loss  0.3440021574497223
fps: 9.296566459869627
TIMESTEP 6025 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.049715 / Loss  1.3438892364501953
fps: 10.687622469390615
TIMESTEP 6026 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.498535 / Loss  1.831106185913086
fps: 12.81141642332645
TIMESTEP 6027 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.682505 / Loss  0.2799094617366791
fps: 10.803156745173188
TIMESTEP 6028 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.302661 / Loss  1.2384370565414429
fps: 12.647579071794084
TIMESTEP 6029 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.119594 / Loss  1.0954400300979614
fps: 12.894084952150243
TIMESTEP 6030 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.055214 / Loss  0.905803382396698
fps: 12.811377291234587
TIMESTEP 6031 / STATE explore / EPSILON 0 / ACTION 0

TIMESTEP 6086 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.6071105 / Loss  0.6305457353591919
fps: 10.57434658854554
TIMESTEP 6087 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.8150325 / Loss  0.3084360957145691
fps: 10.803323700166391
TIMESTEP 6088 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.553856 / Loss  0.24441014230251312
fps: 10.518841757326793
TIMESTEP 6089 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.55164 / Loss  0.3700445294380188
fps: 10.745037761177207
TIMESTEP 6090 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.1166277 / Loss  0.5271158218383789
fps: 10.803128919842678
TIMESTEP 6091 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.0905285 / Loss  0.9819341897964478
fps: 10.300960763895711
TIMESTEP 6092 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.302411 / Loss  0.24249865114688873
fps: 13.237297817613735
TIMESTEP 6093 / STATE explore / EPSILON 0 / ACTION

TIMESTEP 6147 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.730817 / Loss  0.5072903633117676
fps: 10.630709727129409
TIMESTEP 6148 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.357494 / Loss  0.3537008762359619
fps: 10.745120342261332
TIMESTEP 6149 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8937464 / Loss  0.2744021415710449
fps: 12.491077597503157
TIMESTEP 6150 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.759427 / Loss  0.43578559160232544
fps: 10.80307326961167
TIMESTEP 6151 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.031782 / Loss  0.23418249189853668
fps: 10.687595236032474
TIMESTEP 6152 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.324086 / Loss  0.261048287153244
fps: 10.68642433291124
TIMESTEP 6153 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3044486 / Loss  0.38875964283943176
fps: 12.571271003051175
TIMESTEP 6154 / STATE explore / EPSILON 0 / ACTION 0 

fps: 10.518894517730851
TIMESTEP 6209 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.9307585 / Loss  0.21484440565109253
fps: 10.5744798862456
TIMESTEP 6210 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.143222 / Loss  0.07103286683559418
fps: 10.861879740721847
TIMESTEP 6211 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.848203 / Loss  0.1753736138343811
fps: 12.413517145038801
TIMESTEP 6212 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.103504 / Loss  0.5176688432693481
fps: 10.981204493746613
TIMESTEP 6213 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.788663 / Loss  0.38638007640838623
fps: 12.729801175767177
TIMESTEP 6214 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.159756 / Loss  0.2785283625125885
fps: 12.569651018175824
TIMESTEP 6215 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.777397 / Loss  0.23793894052505493
fps: 10.982383376275163
TIMESTEP 6216 / STATE exp

TIMESTEP 6271 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.7031054 / Loss  0.16779965162277222
fps: 10.574239952805097
TIMESTEP 6272 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.274972 / Loss  0.2747114300727844
fps: 10.803268047928622
TIMESTEP 6273 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.981333 / Loss  0.27803105115890503
fps: 12.491077597503157
TIMESTEP 6274 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.747121 / Loss  0.17690031230449677
fps: 12.977305284913816
TIMESTEP 6275 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.813786 / Loss  0.1925288587808609
fps: 12.978429643382068
TIMESTEP 6276 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.455798 / Loss  0.21767659485340118
fps: 10.687622469390615
TIMESTEP 6277 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  1.8730301 / Loss  0.28724128007888794
fps: 12.97778712897327
TIMESTEP 6278 / STATE explore / EPSILON 0 / ACTIO

fps: 10.687595236032474
TIMESTEP 6332 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.583008 / Loss  0.35140034556388855
fps: 10.574426566762302
TIMESTEP 6333 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8952014 / Loss  0.19323140382766724
fps: 10.744900128857726
TIMESTEP 6334 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.022622 / Loss  0.5130603313446045
fps: 10.574586526825334
TIMESTEP 6335 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.20559 / Loss  0.24254661798477173
fps: 10.803017619954
TIMESTEP 6336 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.7628536 / Loss  0.25412100553512573
fps: 9.08451845145528
TIMESTEP 6337 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.306803 / Loss  0.49201691150665283
fps: 10.518841757326793
TIMESTEP 6338 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.550086 / Loss  0.4142310917377472
fps: 10.981204493746613
TIMESTEP 6339 / STATE explor

fps: 9.295721707061931
TIMESTEP 6394 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.920915 / Loss  0.414938747882843
fps: 10.301871592081348
TIMESTEP 6395 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.551104 / Loss  0.23149403929710388
fps: 8.157537590219444
TIMESTEP 6396 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.287708 / Loss  0.5224352478981018
fps: 8.727424056730387
TIMESTEP 6397 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.9990234 / Loss  1.3176323175430298
fps: 9.607579221278993
TIMESTEP 6398 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.020643 / Loss  0.5552960634231567
fps: 10.631922352541565
TIMESTEP 6399 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.730032 / Loss  1.8136166334152222
fps: 10.51657356053236
TIMESTEP 6400 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.181282 / Loss  0.7591732144355774
fps: 9.339104722207377
TIMESTEP 6401 / STATE explore / 

TIMESTEP 6456 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  22.769289 / Loss  1.9450870752334595
fps: 9.003395884582856
TIMESTEP 6457 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  20.991367 / Loss  1.152503252029419
fps: 10.86182348355954
TIMESTEP 6458 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  20.950989 / Loss  1.0050647258758545
fps: 9.001637518269167
TIMESTEP 6459 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  20.156519 / Loss  0.5854630470275879
fps: 9.254107674351388
TIMESTEP 6460 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  21.27535 / Loss  1.5165019035339355
fps: 9.29578351303731
TIMESTEP 6461 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  21.513424 / Loss  0.4858569800853729
fps: 9.04342426411612
TIMESTEP 6462 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  25.311768 / Loss  1.8769593238830566
fps: 7.898520594172768
TIMESTEP 6463 / STATE explore / EPSILON 0 / ACTION 1 / REWA

TIMESTEP 6517 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  31.959087 / Loss  2.8188445568084717
fps: 12.976261412187643
TIMESTEP 6518 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  14.811225 / Loss  1.3403358459472656
fps: 12.571082611262801
TIMESTEP 6519 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  19.36648 / Loss  1.2998210191726685
fps: 12.89412459113155
TIMESTEP 6520 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  27.628738 / Loss  3.118457317352295
fps: 10.5744798862456
TIMESTEP 6521 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  18.948729 / Loss  2.390957832336426
fps: 10.687649702887546
TIMESTEP 6522 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.582798 / Loss  0.9717293381690979
fps: 10.74501023443124
TIMESTEP 6523 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  30.25067 / Loss  2.62308406829834
fps: 10.574319929408798
TIMESTEP 6524 / STATE explore / EPSILON 0 / ACTION 1 / REW

TIMESTEP 6579 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.332493 / Loss  1.0107054710388184
fps: 10.519897065979102
TIMESTEP 6580 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.145794 / Loss  1.1924524307250977
fps: 10.745092815092251
TIMESTEP 6581 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.975778 / Loss  0.21705642342567444
fps: 10.63068278303979
TIMESTEP 6582 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.852346 / Loss  1.331741452217102
fps: 10.249108094107068
TIMESTEP 6583 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  15.182618 / Loss  0.7079035043716431
fps: 12.569651018175824
TIMESTEP 6584 / STATE explore / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  15.405515 / Loss  0.5322784185409546
fps: 11.486114420135666
TIMESTEP 6585 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.497313 / Loss  0.8735135793685913
fps: 12.729839811099714
TIMESTEP 6586 / STATE explore / EPSILON 0 / ACTION 0 /

fps: 12.72976254066916
TIMESTEP 6642 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.947383 / Loss  0.588126003742218
fps: 10.981204493746613
TIMESTEP 6643 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  1.593639 / Loss  0.7723218202590942
fps: 10.518894517730851
TIMESTEP 6644 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.026652 / Loss  0.3297346234321594
fps: 10.630709727129409
TIMESTEP 6645 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.560541 / Loss  0.4120924472808838
fps: 12.646625700278001
TIMESTEP 6646 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.8409 / Loss  0.31697338819503784
fps: 10.863764692474655
TIMESTEP 6647 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.6600146 / Loss  0.5235839486122131
fps: 12.649257354826062
TIMESTEP 6648 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.356565 / Loss  0.6224229335784912
fps: 10.687649702887546
TIMESTEP 6649 / STATE explore / E

TIMESTEP 6705 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.9331198 / Loss  0.323971152305603
fps: 12.039485732492487
TIMESTEP 6706 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.581518 / Loss  0.2409314215183258
fps: 13.59593124082244
TIMESTEP 6707 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.430256 / Loss  0.822674572467804
fps: 12.729878446666769
TIMESTEP 6708 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.172712 / Loss  0.31073224544525146
fps: 12.729839811099714
TIMESTEP 6709 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.192056 / Loss  0.6454058885574341
fps: 9.125888534720032
TIMESTEP 6710 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8955135 / Loss  0.4579669237136841
fps: 13.062583114549367
TIMESTEP 6711 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.3542757 / Loss  0.3207101821899414
fps: 12.811259896393315
TIMESTEP 6712 / STATE explore / EPSILON 0 / ACTION 0 /

fps: 10.409303664583634
TIMESTEP 6768 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.098349 / Loss  0.2673397660255432
fps: 9.295680503534939
TIMESTEP 6769 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.6167064 / Loss  0.4794621169567108
fps: 10.518868137462663
TIMESTEP 6770 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.746235 / Loss  0.33591946959495544
fps: 9.295701105252776
TIMESTEP 6771 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.14941 / Loss  0.4493890702724457
fps: 10.5744798862456
TIMESTEP 6772 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.744855 / Loss  0.7140748500823975
fps: 9.21003236672332
TIMESTEP 6773 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.713323 / Loss  0.2929701507091522
fps: 10.574559866478756
TIMESTEP 6774 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.5650028 / Loss  0.4801417291164398
fps: 12.729801175767177
TIMESTEP 6775 / STATE explore / E

TIMESTEP 6830 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.458462 / Loss  0.703309178352356
fps: 10.920846525579071
TIMESTEP 6831 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.659587 / Loss  0.29350247979164124
fps: 10.80240551155753
TIMESTEP 6832 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.995453 / Loss  0.3745066821575165
fps: 10.575599719618156
TIMESTEP 6833 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.3393846 / Loss  0.2649780809879303
fps: 10.745092815092251
TIMESTEP 6834 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4529986 / Loss  0.2250172197818756
fps: 10.301795683580462
TIMESTEP 6835 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.439304 / Loss  0.31238502264022827
fps: 10.92130150397867
TIMESTEP 6836 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.673587 / Loss  0.217535138130188
fps: 10.687704170297776
TIMESTEP 6837 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 6891 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.770223 / Loss  0.4833696484565735
fps: 10.68756800281312
TIMESTEP 6892 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.985249 / Loss  0.8874422907829285
fps: 12.729878446666769
TIMESTEP 6893 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.832783 / Loss  0.21929147839546204
fps: 10.687622469390615
TIMESTEP 6894 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.140902 / Loss  0.22952276468276978
fps: 9.209991919333872
TIMESTEP 6895 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4631863 / Loss  0.4400618374347687
fps: 10.574506546188891
TIMESTEP 6896 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.9427605 / Loss  0.30791306495666504
fps: 10.981290744861893
TIMESTEP 6897 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.8959866 / Loss  0.3595731556415558
fps: 10.63079056021777
TIMESTEP 6898 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 6953 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.806689 / Loss  0.18135300278663635
fps: 10.630655839086753
TIMESTEP 6954 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.900163 / Loss  0.10373885184526443
fps: 12.894520994349449
TIMESTEP 6955 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.7571106 / Loss  0.16212788224220276
fps: 8.02645437844458
TIMESTEP 6956 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.067147 / Loss  0.3235965669155121
fps: 8.842884671335867
TIMESTEP 6957 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.509831 / Loss  0.3050577938556671
fps: 9.472017343781758
TIMESTEP 6958 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.150079 / Loss  0.4223026633262634
fps: 10.5744798862456
TIMESTEP 6959 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.2155895 / Loss  0.14303264021873474
fps: 12.89392639866213
TIMESTEP 6960 / STATE explore / EPSILON 0 / ACTION 0 / R

TIMESTEP 7015 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.692303 / Loss  0.22548043727874756
fps: 10.92477196126337
TIMESTEP 7016 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8653884 / Loss  0.39912542700767517
fps: 10.5744798862456
TIMESTEP 7017 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.882167 / Loss  0.33029764890670776
fps: 10.630763615718399
TIMESTEP 7018 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.3314977 / Loss  0.1158137395977974
fps: 10.68650601550119
TIMESTEP 7019 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.618804 / Loss  0.15825432538986206
fps: 9.209183046143577
TIMESTEP 7020 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.420352 / Loss  0.4858399033546448
fps: 10.575386399737777
TIMESTEP 7021 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.6310735 / Loss  0.12856774032115936
fps: 9.125193629797232
TIMESTEP 7022 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 7076 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  1.9203191 / Loss  0.1683984100818634
fps: 12.809694837401354
TIMESTEP 7077 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  0.424373 / Loss  0.15609195828437805
fps: 13.062827208825022
TIMESTEP 7078 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.852114 / Loss  0.16281408071517944
fps: 12.729801175767177
TIMESTEP 7079 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.975235 / Loss  0.23323601484298706
fps: 12.893966036668592
TIMESTEP 7080 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.500896 / Loss  0.32342761754989624
fps: 9.125888534720032
TIMESTEP 7081 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2143888 / Loss  0.19517500698566437
fps: 9.252801118903333
TIMESTEP 7082 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.793181 / Loss  0.10525882244110107
fps: 10.74506528806421
TIMESTEP 7083 / STATE explore / EPSILON 0 / ACTION

TIMESTEP 7137 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.353094 / Loss  0.15991654992103577
fps: 10.745092815092251
TIMESTEP 7138 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.7011986 / Loss  0.6378591060638428
fps: 12.81141642332645
TIMESTEP 7139 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.973102 / Loss  0.12260439991950989
fps: 12.894005674918764
TIMESTEP 7140 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.626007 / Loss  0.2847133278846741
fps: 10.518841757326793
TIMESTEP 7141 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4482203 / Loss  0.11790546774864197
fps: 10.803128919842678
TIMESTEP 7142 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.8297944 / Loss  0.29756274819374084
fps: 8.88258639457511
TIMESTEP 7143 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.888501 / Loss  0.16843755543231964
fps: 9.339125516854407
TIMESTEP 7144 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 7199 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9147477 / Loss  0.2531837224960327
fps: 10.68754076973255
TIMESTEP 7200 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7932467 / Loss  0.2162914276123047
fps: 10.745037761177207
TIMESTEP 7201 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.7615957 / Loss  0.3879569172859192
fps: 10.518868137462663
TIMESTEP 7202 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.485894 / Loss  0.2215307354927063
fps: 9.295701105252776
TIMESTEP 7203 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3649545 / Loss  0.06993845105171204
fps: 10.574533206266608
TIMESTEP 7204 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.122038 / Loss  0.07824931293725967
fps: 9.209991919333872
TIMESTEP 7205 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.036249 / Loss  0.2126040905714035
fps: 10.687649702887546
TIMESTEP 7206 / STATE explore / EPSILON 0 / ACTION 1

TIMESTEP 7261 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.015795 / Loss  0.1384660005569458
fps: 8.922209671620964
TIMESTEP 7262 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.3448634 / Loss  0.1779075562953949
fps: 9.339000750361265
TIMESTEP 7263 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.910951 / Loss  0.1848093569278717
fps: 10.518868137462663
TIMESTEP 7264 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.892848 / Loss  0.2056192010641098
fps: 10.803156745173188
TIMESTEP 7265 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.064708 / Loss  0.24597881734371185
fps: 10.744955181362414
TIMESTEP 7266 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.3677216 / Loss  0.4568745493888855
fps: 10.355334561201666
TIMESTEP 7267 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.8648221 / Loss  0.32387661933898926
fps: 10.981233243967829
TIMESTEP 7268 / STATE explore / EPSILON 0 / ACTION 0

fps: 10.35538569411114
TIMESTEP 7323 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.62631 / Loss  0.6158103346824646
fps: 12.649219207083531
TIMESTEP 7324 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.301687 / Loss  0.3328136205673218
fps: 9.33841855080208
TIMESTEP 7325 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3716197 / Loss  0.35519611835479736
fps: 12.979353369312275
TIMESTEP 7326 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.29432 / Loss  0.20315062999725342
fps: 12.41190083035931
TIMESTEP 7327 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.866691 / Loss  0.5854198336601257
fps: 13.148700586225274
TIMESTEP 7328 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.2301927 / Loss  0.18035143613815308
fps: 10.463603319969964
TIMESTEP 7329 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.6864805 / Loss  0.3188199996948242
fps: 10.981319495534745
TIMESTEP 7330 / STATE explo

TIMESTEP 7384 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.307492 / Loss  0.4880461096763611
fps: 9.043365768360863
TIMESTEP 7385 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.03062 / Loss  0.8595080375671387
fps: 12.649219207083531
TIMESTEP 7386 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.273528 / Loss  0.22534361481666565
fps: 13.235668709410717
TIMESTEP 7387 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.372901 / Loss  0.3805184066295624
fps: 10.572720627158377
TIMESTEP 7388 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.376903 / Loss  0.2765817642211914
fps: 12.814117115107893
TIMESTEP 7389 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.631879 / Loss  0.37327301502227783
fps: 13.062583114549367
TIMESTEP 7390 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.538757 / Loss  0.14188939332962036
fps: 9.125948102923834
TIMESTEP 7391 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 7447 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.890222 / Loss  0.3983859717845917
fps: 9.21003236672332
TIMESTEP 7448 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.291327 / Loss  0.2659682035446167
fps: 9.210052590551268
TIMESTEP 7449 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.923757 / Loss  0.19482019543647766
fps: 10.463786049296477
TIMESTEP 7450 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8989544 / Loss  0.23849526047706604
fps: 10.743909280230744
TIMESTEP 7451 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.873912 / Loss  0.2369147688150406
fps: 9.29578351303731
TIMESTEP 7452 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.324703 / Loss  0.20775292813777924
fps: 10.5744798862456
TIMESTEP 7453 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.147082 / Loss  0.24146956205368042
fps: 9.209971695772373
TIMESTEP 7454 / STATE explore / EPSILON 0 / ACTION 1 / 

TIMESTEP 7508 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.832901 / Loss  0.38449564576148987
fps: 10.302200541845664
TIMESTEP 7509 / STATE explore / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  10.821737 / Loss  0.1777627170085907
fps: 10.98117574367594
TIMESTEP 7510 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.57511 / Loss  0.5698834657669067
fps: 10.687622469390615
TIMESTEP 7511 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.090679 / Loss  0.42510926723480225
fps: 12.490928800314483
TIMESTEP 7512 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.8446455 / Loss  0.6389128565788269
fps: 10.981348246358143
TIMESTEP 7513 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3206472 / Loss  0.6107176542282104
fps: 12.811377291234587
TIMESTEP 7514 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.176676 / Loss  0.23539197444915771
fps: 10.74506528806421
TIMESTEP 7515 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 7569 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.937395 / Loss  0.23025010526180267
fps: 10.40927783113573
TIMESTEP 7570 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.1872053 / Loss  0.25017040967941284
fps: 13.148576928020365
TIMESTEP 7571 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.035469 / Loss  0.3688177764415741
fps: 10.57392005848764
TIMESTEP 7572 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.842509 / Loss  0.6832217574119568
fps: 9.001598880568986
TIMESTEP 7573 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.755957 / Loss  0.11802703887224197
fps: 11.043977860983405
TIMESTEP 7574 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.269151 / Loss  0.24570332467556
fps: 9.125928246769494
TIMESTEP 7575 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.8121114 / Loss  0.3596738576889038
fps: 12.729801175767177
TIMESTEP 7576 / STATE explore / EPSILON 0 / ACTION 0 / R

fps: 10.574399907222292
TIMESTEP 7632 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.9646735 / Loss  0.3937682509422302
fps: 13.062827208825022
TIMESTEP 7633 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.776868 / Loss  0.2859877049922943
fps: 10.74506528806421
TIMESTEP 7634 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.21537 / Loss  0.4521796703338623
fps: 10.301947501700901
TIMESTEP 7635 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.252438 / Loss  0.3049236238002777
fps: 11.102974343770185
TIMESTEP 7636 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.03201 / Loss  0.74399733543396
fps: 10.517997662836594
TIMESTEP 7637 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.46816 / Loss  0.18099012970924377
fps: 10.688521248076002
TIMESTEP 7638 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.407489 / Loss  0.2197389304637909
fps: 10.355487961445021
TIMESTEP 7639 / STATE explore / E

TIMESTEP 7694 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.573103 / Loss  0.8752081394195557
fps: 10.574533206266608
TIMESTEP 7695 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.194306 / Loss  0.3663293123245239
fps: 10.745037761177207
TIMESTEP 7696 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.220757 / Loss  0.2728136479854584
fps: 10.630736671355612
TIMESTEP 7697 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.180504 / Loss  0.19183078408241272
fps: 10.745037761177207
TIMESTEP 7698 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.657373 / Loss  0.3120242655277252
fps: 10.68756800281312
TIMESTEP 7699 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.26905 / Loss  0.8151373267173767
fps: 10.74506528806421
TIMESTEP 7700 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.313212 / Loss  0.32039597630500793
fps: 12.649333651001408
TIMESTEP 7701 / STATE explore / EPSILON 0 / ACTION 0

TIMESTEP 7756 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.059187 / Loss  0.2212844043970108
fps: 10.98117574367594
TIMESTEP 7757 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.9291835 / Loss  0.3813325762748718
fps: 9.084459422873245
TIMESTEP 7758 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.197971 / Loss  0.23560671508312225
fps: 9.167797440022383
TIMESTEP 7759 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.131477 / Loss  0.17251136898994446
fps: 10.407547282174459
TIMESTEP 7760 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.234263 / Loss  0.42332014441490173
fps: 13.065187257225983
TIMESTEP 7761 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.380226 / Loss  0.17054837942123413
fps: 9.2958247174775
TIMESTEP 7762 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.724224 / Loss  0.3568291664123535
fps: 10.301719776198219
TIMESTEP 7763 / STATE explore / EPSILON 0 / ACTION 1 / 

TIMESTEP 7818 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.645428 / Loss  0.40492957830429077
fps: 10.574666508671656
TIMESTEP 7819 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.39532 / Loss  0.31081491708755493
fps: 10.86190786952153
TIMESTEP 7820 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.30633 / Loss  0.3199475407600403
fps: 9.16781747879249
TIMESTEP 7821 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.99267733 / Loss  0.14730793237686157
fps: 10.518841757326793
TIMESTEP 7822 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.235931 / Loss  0.27476465702056885
fps: 10.86182348355954
TIMESTEP 7823 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.256905 / Loss  0.16716039180755615
fps: 10.301998108735432
TIMESTEP 7824 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.100261 / Loss  0.618822455406189
fps: 10.98117574367594
TIMESTEP 7825 / STATE explore / EPSILON 0 / ACTION 1 /

fps: 10.463707735946493
TIMESTEP 7881 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.588849 / Loss  0.21921879053115845
fps: 10.74481755115843
TIMESTEP 7882 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.340773 / Loss  0.2670835554599762
fps: 9.126087098423612
TIMESTEP 7883 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6150892 / Loss  0.3540240228176117
fps: 9.084459422873245
TIMESTEP 7884 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.593427 / Loss  0.33550313115119934
fps: 9.0844397468497
TIMESTEP 7885 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.886795 / Loss  0.16262148320674896
fps: 10.687649702887546
TIMESTEP 7886 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.446199 / Loss  0.20289692282676697
fps: 9.21003236672332
TIMESTEP 7887 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.0966754 / Loss  0.32335415482521057
fps: 9.125928246769494
TIMESTEP 7888 / STATE explore / 

fps: 10.630871394535408
TIMESTEP 7942 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.692816 / Loss  1.2987840175628662
fps: 11.29143189423328
TIMESTEP 7943 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.108609 / Loss  0.19058704376220703
fps: 10.518815377323238
TIMESTEP 7944 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.758141 / Loss  0.42151933908462524
fps: 12.729839811099714
TIMESTEP 7945 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.1398516 / Loss  0.38373950123786926
fps: 12.811338159381776
TIMESTEP 7946 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.538175 / Loss  0.24403995275497437
fps: 13.062623796295142
TIMESTEP 7947 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.826457 / Loss  0.21706867218017578
fps: 12.811494688227352
TIMESTEP 7948 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.439642 / Loss  0.29195067286491394
fps: 10.574453226436738
TIMESTEP 7949 / STATE ex

TIMESTEP 8003 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.031334 / Loss  0.6039908528327942
fps: 10.63259615287115
TIMESTEP 8004 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.261214 / Loss  0.6425751447677612
fps: 9.295701105252776
TIMESTEP 8005 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.173113 / Loss  0.2810437083244324
fps: 12.729839811099714
TIMESTEP 8006 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.181978 / Loss  0.223142609000206
fps: 10.518894517730851
TIMESTEP 8007 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.5143015 / Loss  0.2447236180305481
fps: 9.209991919333872
TIMESTEP 8008 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.3238597 / Loss  0.2913517951965332
fps: 9.167797440022383
TIMESTEP 8009 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.520051 / Loss  0.25026965141296387
fps: 9.210052590551268
TIMESTEP 8010 / STATE explore / EPSILON 0 / ACTION 1 

TIMESTEP 8065 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.892518 / Loss  0.24824415147304535
fps: 9.25265823678382
TIMESTEP 8066 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3129 / Loss  0.18064670264720917
fps: 10.574559866478756
TIMESTEP 8067 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.188972 / Loss  0.2348504662513733
fps: 9.21003236672332
TIMESTEP 8068 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.6961043 / Loss  0.19787998497486115
fps: 9.084400395058307
TIMESTEP 8069 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.391685 / Loss  0.16286998987197876
fps: 9.125372309526554
TIMESTEP 8070 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.813289 / Loss  0.35701656341552734
fps: 10.688412297154041
TIMESTEP 8071 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.712781 / Loss  0.07910209894180298
fps: 9.084459422873245
TIMESTEP 8072 / STATE explore / EPSILON 0 / ACTION 1 / RE

fps: 10.574453226436738
TIMESTEP 8128 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.1572695 / Loss  0.21772319078445435
fps: 9.427245326614416
TIMESTEP 8129 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.435547 / Loss  0.14824612438678741
fps: 9.0433267716111
TIMESTEP 8130 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.648366 / Loss  0.48328444361686707
fps: 10.687622469390615
TIMESTEP 8131 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.438329 / Loss  0.2770421504974365
fps: 8.88207851832087
TIMESTEP 8132 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.859365 / Loss  0.2860983610153198
fps: 9.337774057385078
TIMESTEP 8133 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.205592 / Loss  0.27195557951927185
fps: 10.923207137853176
TIMESTEP 8134 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.674314 / Loss  0.2072213888168335
fps: 12.729955718504446
TIMESTEP 8135 / STATE explore / E

fps: 8.090848590184047
TIMESTEP 8189 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  1.9230573 / Loss  0.5710179209709167
fps: 9.210881843975368
TIMESTEP 8190 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.407427 / Loss  0.4275912642478943
fps: 10.687513536790767
TIMESTEP 8191 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.865501 / Loss  0.19231146574020386
fps: 8.962264634740468
TIMESTEP 8192 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.6769054 / Loss  0.3124812841415405
fps: 8.192032000125
TIMESTEP 8193 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.7261143 / Loss  0.41355717182159424
fps: 10.145014331152419
TIMESTEP 8194 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.6894755 / Loss  0.6513142585754395
fps: 9.47199595312673
TIMESTEP 8195 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.986199 / Loss  0.44507116079330444
fps: 9.125948102923834
TIMESTEP 8196 / STATE explore / EP

TIMESTEP 8250 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.8784294 / Loss  0.4201648533344269
fps: 8.92224763079803
TIMESTEP 8251 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.5393066 / Loss  0.21296030282974243
fps: 7.267595871944331
TIMESTEP 8252 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.22098 / Loss  0.1041913852095604
fps: 6.385200318475701
TIMESTEP 8253 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.194712 / Loss  0.28629568219184875
fps: 8.058793804746927
TIMESTEP 8254 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.669361 / Loss  0.22370657324790955
fps: 9.043404765446953
TIMESTEP 8255 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.437929 / Loss  0.2802254259586334
fps: 8.961690162512339
TIMESTEP 8256 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.720114 / Loss  0.070778027176857
fps: 10.921984042664
TIMESTEP 8257 / STATE explore / EPSILON 0 / ACTION 0 / REW

fps: 9.25265823678382
TIMESTEP 8312 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.879272 / Loss  0.49334532022476196
fps: 9.04328777519766
TIMESTEP 8313 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.6769547 / Loss  0.6767019629478455
fps: 9.339208696368571
TIMESTEP 8314 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.41589 / Loss  0.47607964277267456
fps: 10.63079056021777
TIMESTEP 8315 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.0030794 / Loss  0.2853557765483856
fps: 9.295701105252776
TIMESTEP 8316 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.5215745 / Loss  0.151211678981781
fps: 10.355027774348846
TIMESTEP 8317 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4475737 / Loss  0.19287963211536407
fps: 10.861992256794727
TIMESTEP 8318 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.241022 / Loss  0.20988458395004272
fps: 9.339125516854407
TIMESTEP 8319 / STATE explore 

TIMESTEP 8373 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.660803 / Loss  0.41254955530166626
fps: 10.803295873975834
TIMESTEP 8374 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.274782 / Loss  0.23983782529830933
fps: 10.518762617713074
TIMESTEP 8375 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.674357 / Loss  0.19670799374580383
fps: 10.68756800281312
TIMESTEP 8376 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.5774145 / Loss  0.21557891368865967
fps: 9.25273988316891
TIMESTEP 8377 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.37238 / Loss  0.24519680440425873
fps: 10.247355279424196
TIMESTEP 8378 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8995104 / Loss  0.20267188549041748
fps: 9.473429340657354
TIMESTEP 8379 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.488127 / Loss  0.4109715223312378
fps: 10.463786049296477
TIMESTEP 8380 / STATE explore / EPSILON 0 / ACTION

TIMESTEP 8434 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.4792604 / Loss  0.38545435667037964
fps: 9.167837517650197
TIMESTEP 8435 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.034208 / Loss  0.24988749623298645
fps: 9.167777401339876
TIMESTEP 8436 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.82656 / Loss  1.068137526512146
fps: 9.001618199377615
TIMESTEP 8437 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.149824 / Loss  0.16300317645072937
fps: 9.210133486751237
TIMESTEP 8438 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.340599 / Loss  0.4054529666900635
fps: 10.68756800281312
TIMESTEP 8439 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.111685 / Loss  0.4022505283355713
fps: 8.882567583307743
TIMESTEP 8440 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.418923 / Loss  0.21272218227386475
fps: 9.382957542621803
TIMESTEP 8441 / STATE explore / EPSILON 0 / ACTION 1 /

fps: 10.74501023443124
TIMESTEP 8495 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.3059 / Loss  0.22121724486351013
fps: 12.336950223396151
TIMESTEP 8496 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.055866 / Loss  0.24661695957183838
fps: 9.38299952350166
TIMESTEP 8497 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.2279315 / Loss  0.3504815995693207
fps: 9.084459422873245
TIMESTEP 8498 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.817793 / Loss  0.1453738808631897
fps: 10.74506528806421
TIMESTEP 8499 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.400449 / Loss  0.4400499165058136
fps: 10.86182348355954
TIMESTEP 8500 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.348349 / Loss  0.36150169372558594
fps: 10.63079056021777
TIMESTEP 8501 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.695335 / Loss  0.3875526189804077
fps: 10.46362942376873
TIMESTEP 8502 / STATE explore / EPS

TIMESTEP 8556 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  12.352855 / Loss  0.4962552487850189
fps: 10.921216192641081
TIMESTEP 8557 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.283259 / Loss  0.13693895936012268
fps: 10.921187755824732
TIMESTEP 8558 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.024665 / Loss  0.31216368079185486
fps: 12.811377291234587
TIMESTEP 8559 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.1757326 / Loss  0.1421157419681549
fps: 12.18643841268181
TIMESTEP 8560 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.810815 / Loss  0.3600846230983734
fps: 9.383041504757175
TIMESTEP 8561 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.124429 / Loss  0.31313398480415344
fps: 9.04410677051326
TIMESTEP 8562 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.466428 / Loss  0.24970853328704834
fps: 10.803156745173188
TIMESTEP 8563 / STATE explore / EPSILON 0 / ACTION

TIMESTEP 8618 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8198876 / Loss  0.23616787791252136
fps: 10.630709727129409
TIMESTEP 8619 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.4737864 / Loss  0.3925809860229492
fps: 9.210072814468033
TIMESTEP 8620 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4030204 / Loss  0.20247884094715118
fps: 9.084459422873245
TIMESTEP 8621 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7656984 / Loss  1.1598732471466064
fps: 10.518868137462663
TIMESTEP 8622 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.733853 / Loss  0.12260650843381882
fps: 9.295680503534939
TIMESTEP 8623 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.6978545 / Loss  0.2196849286556244
fps: 9.084459422873245
TIMESTEP 8624 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5327635 / Loss  0.1050354614853859
fps: 10.35530899493629
TIMESTEP 8625 / STATE explore / EPSILON 0 / ACTION 

fps: 8.058809288670451
TIMESTEP 8681 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.617713 / Loss  0.22413098812103271
fps: 9.210012142984187
TIMESTEP 8682 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.624132 / Loss  0.1372281312942505
fps: 9.084498775176035
TIMESTEP 8683 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.189705 / Loss  0.2554260194301605
fps: 9.25267864824501
TIMESTEP 8684 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.955153 / Loss  0.3161543011665344
fps: 9.084420070911387
TIMESTEP 8685 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.2462187 / Loss  0.24224485456943512
fps: 9.125928246769494
TIMESTEP 8686 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.254159 / Loss  0.3058546483516693
fps: 9.125134071442401
TIMESTEP 8687 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.644268 / Loss  0.13463863730430603
fps: 9.043989762100365
TIMESTEP 8688 / STATE explore /

TIMESTEP 8742 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.786062 / Loss  0.5415732860565186
fps: 12.894084952150243
TIMESTEP 8743 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.540914 / Loss  0.2005249559879303
fps: 10.630709727129409
TIMESTEP 8744 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.780869 / Loss  0.6913892030715942
fps: 12.977305284913816
TIMESTEP 8745 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.47604 / Loss  0.38923588395118713
fps: 10.519158327690418
TIMESTEP 8746 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.819622 / Loss  0.8397818803787231
fps: 12.729801175767177
TIMESTEP 8747 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.538485 / Loss  0.16712725162506104
fps: 12.97778712897327
TIMESTEP 8748 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.979642 / Loss  0.18114033341407776
fps: 12.97786743979529
TIMESTEP 8749 / STATE explore / EPSILON 0 / ACTION 0 

TIMESTEP 8804 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.5941 / Loss  0.6018881797790527
fps: 11.165249150155594
TIMESTEP 8805 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.399751 / Loss  0.1324784755706787
fps: 8.96216888425452
TIMESTEP 8806 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.527833 / Loss  0.8063169717788696
fps: 9.339333468418017
TIMESTEP 8807 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4044256 / Loss  0.43802255392074585
fps: 7.77660475869984
TIMESTEP 8808 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.238352 / Loss  0.44054991006851196
fps: 9.471931781741318
TIMESTEP 8809 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.975722 / Loss  0.3324824273586273
fps: 10.803101094655505
TIMESTEP 8810 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.77525 / Loss  0.15499864518642426
fps: 7.8995172867106
TIMESTEP 8811 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 

TIMESTEP 8866 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.629974 / Loss  0.26335039734840393
fps: 10.86182348355954
TIMESTEP 8867 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.828904 / Loss  0.15417852997779846
fps: 10.574266611538578
TIMESTEP 8868 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.0280523 / Loss  0.31585806608200073
fps: 10.4093811656967
TIMESTEP 8869 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.028609 / Loss  0.4588369131088257
fps: 13.235668709410717
TIMESTEP 8870 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.024121 / Loss  0.43161991238594055
fps: 9.893859835350176
TIMESTEP 8871 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  12.100516 / Loss  0.16095420718193054
fps: 9.894023207044675
TIMESTEP 8872 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.959467 / Loss  0.223806232213974
fps: 9.043385266861867
TIMESTEP 8873 / STATE explore / EPSILON 0 / ACTION 

fps: 10.463838258847714
TIMESTEP 8927 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.628245 / Loss  0.17543108761310577
fps: 9.210052590551268
TIMESTEP 8928 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.681326 / Loss  0.14719229936599731
fps: 12.81141642332645
TIMESTEP 8929 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.07158 / Loss  0.2962852418422699
fps: 9.043229281208092
TIMESTEP 8930 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.553584 / Loss  0.5730232000350952
fps: 10.86190786952153
TIMESTEP 8931 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.0477705 / Loss  0.2197863757610321
fps: 9.12590839070156
TIMESTEP 8932 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.272553 / Loss  0.15397077798843384
fps: 10.518841757326793
TIMESTEP 8933 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.403411 / Loss  0.2612738013267517
fps: 10.574639847921782
TIMESTEP 8934 / STATE explore / 

TIMESTEP 8988 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.904834 / Loss  0.37300628423690796
fps: 10.301871592081348
TIMESTEP 8989 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.208133 / Loss  0.23185841739177704
fps: 8.157553455914673
TIMESTEP 8990 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.036634 / Loss  0.17647983133792877
fps: 7.994282043902467
TIMESTEP 8991 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.366586 / Loss  0.20947445929050446
fps: 8.058824772653475
TIMESTEP 8992 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.31619 / Loss  0.4494595229625702
fps: 10.518683479290178
TIMESTEP 8993 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.4755177 / Loss  0.08804495632648468
fps: 8.026500458323207
TIMESTEP 8994 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.923511 / Loss  0.278659850358963
fps: 10.74506528806421
TIMESTEP 8995 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 9051 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.1806154 / Loss  0.22067733108997345
fps: 9.003434537711223
TIMESTEP 9052 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.3798456 / Loss  1.0362095832824707
fps: 13.235626942678174
TIMESTEP 9053 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.1832857 / Loss  0.2155170440673828
fps: 12.811377291234587
TIMESTEP 9054 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.459646 / Loss  0.13033868372440338
fps: 10.574506546188891
TIMESTEP 9055 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.635174 / Loss  0.4153362810611725
fps: 10.5744798862456
TIMESTEP 9056 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.12553 / Loss  0.20726174116134644
fps: 12.894164230356578
TIMESTEP 9057 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.520562 / Loss  0.5783827900886536
fps: 12.811338159381776
TIMESTEP 9058 / STATE explore / EPSILON 0 / ACTION 0

TIMESTEP 9113 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.0968804 / Loss  0.19976381957530975
fps: 12.569613349076677
TIMESTEP 9114 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.956214 / Loss  0.8818640112876892
fps: 13.062664478294316
TIMESTEP 9115 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.187931 / Loss  0.32524892687797546
fps: 12.811377291234587
TIMESTEP 9116 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.18043 / Loss  0.28245627880096436
fps: 10.463707735946493
TIMESTEP 9117 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.627771 / Loss  0.6779745221138
fps: 10.687622469390615
TIMESTEP 9118 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.891565 / Loss  0.4600497782230377
fps: 9.295350888468306
TIMESTEP 9119 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.454689 / Loss  0.24946202337741852
fps: 12.811494688227352
TIMESTEP 9120 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 9174 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.744959 / Loss  0.5356428623199463
fps: 7.994282043902467
TIMESTEP 9175 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.9794316 / Loss  0.36454346776008606
fps: 8.058855740798027
TIMESTEP 9176 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.3835864 / Loss  0.4203000068664551
fps: 8.058793804746927
TIMESTEP 9177 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.5934534 / Loss  0.2783254384994507
fps: 10.574506546188891
TIMESTEP 9178 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.545797 / Loss  0.25126680731773376
fps: 12.894084952150243
TIMESTEP 9179 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.915241 / Loss  0.42264309525489807
fps: 10.803156745173188
TIMESTEP 9180 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.45979 / Loss  0.13469386100769043
fps: 10.574453226436738
TIMESTEP 9181 / STATE explore / EPSILON 0 / ACTION 1

TIMESTEP 9235 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.926183 / Loss  0.4029839336872101
fps: 9.084420070911387
TIMESTEP 9236 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.193382 / Loss  0.5570820569992065
fps: 9.210052590551268
TIMESTEP 9237 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.869971 / Loss  0.47906064987182617
fps: 9.125173776925905
TIMESTEP 9238 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.729347 / Loss  0.43065696954727173
fps: 10.519923451408449
TIMESTEP 9239 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.3727922 / Loss  0.40723657608032227
fps: 9.210052590551268
TIMESTEP 9240 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  0.59876513 / Loss  0.7137011289596558
fps: 10.744955181362414
TIMESTEP 9241 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.887718 / Loss  0.3491889238357544
fps: 10.687622469390615
TIMESTEP 9242 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 9297 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.289005 / Loss  0.19168314337730408
fps: 10.574559866478756
TIMESTEP 9298 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.6818748 / Loss  0.38212740421295166
fps: 11.104444103210366
TIMESTEP 9299 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.921157 / Loss  0.5518260598182678
fps: 10.687513536790767
TIMESTEP 9300 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.853979 / Loss  0.3877929151058197
fps: 10.301998108735432
TIMESTEP 9301 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.3163826 / Loss  0.3274345099925995
fps: 8.058778320882904
TIMESTEP 9302 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.3203163 / Loss  0.36913323402404785
fps: 8.058762837078381
TIMESTEP 9303 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.502802 / Loss  0.6371608972549438
fps: 10.86190786952153
TIMESTEP 9304 / STATE explore / EPSILON 0 / ACTION 

fps: 11.041855444840795
TIMESTEP 9359 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.275344 / Loss  0.25880998373031616
fps: 12.729955718504446
TIMESTEP 9360 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.941262 / Loss  0.23764878511428833
fps: 12.569575680203304
TIMESTEP 9361 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.1371684 / Loss  0.5864847302436829
fps: 9.382978533014773
TIMESTEP 9362 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.403579 / Loss  0.530735969543457
fps: 9.0844397468497
TIMESTEP 9363 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.4659524 / Loss  0.09538380801677704
fps: 9.210093038473616
TIMESTEP 9364 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.525684 / Loss  0.3800199627876282
fps: 10.463707735946493
TIMESTEP 9365 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.813061 / Loss  0.4097031354904175
fps: 11.041884513523616
TIMESTEP 9366 / STATE explore / 

fps: 9.167717285817952
TIMESTEP 9420 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.639152 / Loss  0.14073216915130615
fps: 10.803240222024753
TIMESTEP 9421 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.685508 / Loss  0.0573691800236702
fps: 12.491151997426902
TIMESTEP 9422 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.62332 / Loss  0.16393068432807922
fps: 9.295680503534939
TIMESTEP 9423 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.86917 / Loss  0.5975100994110107
fps: 9.043404765446953
TIMESTEP 9424 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.188979 / Loss  0.1314108669757843
fps: 10.745037761177207
TIMESTEP 9425 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.068752 / Loss  0.22218096256256104
fps: 9.16775736274497
TIMESTEP 9426 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.34053 / Loss  0.41644883155822754
fps: 10.630844449626274
TIMESTEP 9427 / STATE explore / EP

fps: 12.89412459113155
TIMESTEP 9481 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.1107569 / Loss  0.2852189838886261
fps: 10.630628895270297
TIMESTEP 9482 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.17857 / Loss  0.14563369750976562
fps: 9.12596795916458
TIMESTEP 9483 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.80102 / Loss  0.6193180680274963
fps: 9.25265823678382
TIMESTEP 9484 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.9411187 / Loss  0.2406175285577774
fps: 10.745092815092251
TIMESTEP 9485 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.946382 / Loss  0.25602003931999207
fps: 12.81141642332645
TIMESTEP 9486 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.371174 / Loss  0.42460712790489197
fps: 10.301972805156018
TIMESTEP 9487 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.623368 / Loss  0.2917507290840149
fps: 10.301922198370086
TIMESTEP 9488 / STATE explore /

TIMESTEP 9542 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.026509 / Loss  0.4958513081073761
fps: 10.745147869571454
TIMESTEP 9543 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8906436 / Loss  0.2640497088432312
fps: 10.74501023443124
TIMESTEP 9544 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.1863995 / Loss  0.11396654695272446
fps: 10.803156745173188
TIMESTEP 9545 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.2715707 / Loss  0.2285354733467102
fps: 10.463655527697739
TIMESTEP 9546 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.817444 / Loss  0.3140835464000702
fps: 9.167917673957044
TIMESTEP 9547 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9546638 / Loss  0.21733912825584412
fps: 10.630575008047122
TIMESTEP 9548 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.451157 / Loss  0.11287516355514526
fps: 9.383839219994139
TIMESTEP 9549 / STATE explore / EPSILON 0 / ACTION 

fps: 8.882529961012036
TIMESTEP 9604 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  3.7435982 / Loss  0.4276844561100006
fps: 10.981261994339588
TIMESTEP 9605 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.3039227 / Loss  0.32473689317703247
fps: 9.167777401339876
TIMESTEP 9606 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.239195 / Loss  0.19790197908878326
fps: 9.125948102923834
TIMESTEP 9607 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.594147 / Loss  0.17724575102329254
fps: 10.63068278303979
TIMESTEP 9608 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.605056 / Loss  0.25226911902427673
fps: 9.33916710642628
TIMESTEP 9609 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.490737 / Loss  0.19587275385856628
fps: 9.084459422873245
TIMESTEP 9610 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.885414 / Loss  0.16027839481830597
fps: 10.803101094655505
TIMESTEP 9611 / STATE explore /

TIMESTEP 9665 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3326607 / Loss  0.3799923062324524
fps: 10.46381215400697
TIMESTEP 9666 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  12.648012 / Loss  0.3096258044242859
fps: 8.124323748462514
TIMESTEP 9667 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.741728 / Loss  0.5758315324783325
fps: 8.962207184203384
TIMESTEP 9668 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.63394 / Loss  0.10410793125629425
fps: 11.103327050551684
TIMESTEP 9669 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.041706 / Loss  0.3871500492095947
fps: 10.574426566762302
TIMESTEP 9670 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.033227 / Loss  0.2918523848056793
fps: 12.649142912288744
TIMESTEP 9671 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.4898224 / Loss  0.3116382956504822
fps: 10.921273066718049
TIMESTEP 9672 / STATE explore / EPSILON 0 / ACTION 0

fps: 8.96241783977367
TIMESTEP 9727 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.064876 / Loss  0.16950833797454834
fps: 8.026439018602613
TIMESTEP 9728 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.28652 / Loss  0.21517077088356018
fps: 12.977827284260032
TIMESTEP 9729 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.43489 / Loss  0.3419265151023865
fps: 12.811455555657371
TIMESTEP 9730 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.257236 / Loss  0.4236423969268799
fps: 8.026423658819432
TIMESTEP 9731 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.637649 / Loss  0.24138426780700684
fps: 7.087151793287801
TIMESTEP 9732 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.6011105 / Loss  0.2988744080066681
fps: 10.745037761177207
TIMESTEP 9733 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.065948 / Loss  0.869080662727356
fps: 10.518815377323238
TIMESTEP 9734 / STATE explore 

TIMESTEP 9787 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.222859 / Loss  0.4475197196006775
fps: 9.428707591874904
TIMESTEP 9788 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.909507 / Loss  0.20006400346755981
fps: 9.001869351435923
TIMESTEP 9789 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.942075 / Loss  0.2798488736152649
fps: 9.168579016973975
TIMESTEP 9790 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.01139 / Loss  0.40849459171295166
fps: 10.408270426673417
TIMESTEP 9791 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.390655 / Loss  0.6111403703689575
fps: 9.42813534710062
TIMESTEP 9792 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.058705 / Loss  0.42070895433425903
fps: 10.745037761177207
TIMESTEP 9793 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.238056 / Loss  0.31179046630859375
fps: 10.518788997451999
TIMESTEP 9794 / STATE explore / EPSILON 0 / ACTION 0 /

TIMESTEP 9849 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.279722 / Loss  0.17423909902572632
fps: 9.126166526324493
TIMESTEP 9850 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.634195 / Loss  0.21827903389930725
fps: 13.062623796295142
TIMESTEP 9851 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.133173 / Loss  0.1979648768901825
fps: 10.463707735946493
TIMESTEP 9852 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8898513 / Loss  0.33006319403648376
fps: 10.981290744861893
TIMESTEP 9853 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.176629 / Loss  0.1590828001499176
fps: 10.518841757326793
TIMESTEP 9854 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.891662 / Loss  0.13488860428333282
fps: 7.962472401990284
TIMESTEP 9855 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.549012 / Loss  0.21868818998336792
fps: 9.38299952350166
TIMESTEP 9856 / STATE explore / EPSILON 0 / ACTION 0 

TIMESTEP 9910 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.941926 / Loss  0.23605743050575256
fps: 8.804164567590261
TIMESTEP 9911 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.140662 / Loss  0.2218674123287201
fps: 9.701850481125092
TIMESTEP 9912 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.4198656 / Loss  0.2371504008769989
fps: 10.574533206266608
TIMESTEP 9913 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.5217757 / Loss  0.1899099051952362
fps: 10.86176722697998
TIMESTEP 9914 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.977563 / Loss  0.16004212200641632
fps: 10.630817504853729
TIMESTEP 9915 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.850788 / Loss  0.24587708711624146
fps: 7.62817089938237
TIMESTEP 9916 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  7.569186 / Loss  0.21194249391555786
fps: 6.59598953902187
TIMESTEP 9917 / STATE explore / EPSILON 0 / ACTION 1 / R

fps: 9.04334626994394
TIMESTEP 9971 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.098451 / Loss  0.1898932158946991
fps: 10.803156745173188
TIMESTEP 9972 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.960108 / Loss  0.42156437039375305
fps: 10.630628895270297
TIMESTEP 9973 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.1726875 / Loss  0.8436142802238464
fps: 10.745147869571454
TIMESTEP 9974 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.6358 / Loss  0.3602231740951538
fps: 8.961479541147387
TIMESTEP 9975 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.017271 / Loss  0.7724656462669373
fps: 10.746083882042479
TIMESTEP 9976 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.956827 / Loss  0.40704721212387085
fps: 12.894164230356578
TIMESTEP 9977 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.8235407 / Loss  0.44878268241882324
fps: 10.463707735946493
TIMESTEP 9978 / STATE explo

TIMESTEP 10033 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.118548 / Loss  1.2679579257965088
fps: 9.38299952350166
TIMESTEP 10034 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.414599 / Loss  0.40352821350097656
fps: 8.962264634740468
TIMESTEP 10035 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4769855 / Loss  0.3187052607536316
fps: 8.124323748462514
TIMESTEP 10036 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.977869 / Loss  0.25364255905151367
fps: 9.084459422873245
TIMESTEP 10037 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.178432 / Loss  0.45969972014427185
fps: 9.25267864824501
TIMESTEP 10038 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.621086 / Loss  1.5747442245483398
fps: 9.00262289170591
TIMESTEP 10039 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.982333 / Loss  0.5306468605995178
fps: 9.252699059796253
TIMESTEP 10040 / STATE explore / EPSILON 0 / ACT

fps: 9.209061727419234
TIMESTEP 10095 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.286646 / Loss  0.3676738440990448
fps: 8.88258639457511
TIMESTEP 10096 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.506962 / Loss  0.7116179466247559
fps: 9.56258777609572
TIMESTEP 10097 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.137831 / Loss  0.33565598726272583
fps: 10.574506546188891
TIMESTEP 10098 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.71059 / Loss  1.0930728912353516
fps: 10.630709727129409
TIMESTEP 10099 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.495414 / Loss  0.42682084441185
fps: 8.68959830612765
TIMESTEP 10100 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  10.667223 / Loss  0.28288736939430237
fps: 9.701760716499662
TIMESTEP 10101 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  13.204835 / Loss  0.38440948724746704
fps: 10.803240222024753
TIMESTEP 10102 / STATE explo

TIMESTEP 10156 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.448234 / Loss  0.4322338402271271
fps: 7.112401372183607
TIMESTEP 10157 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.272192 / Loss  0.8740724325180054
fps: 9.427224137698522
TIMESTEP 10158 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.402153 / Loss  0.5644551515579224
fps: 9.043365768360863
TIMESTEP 10159 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.671986 / Loss  0.09125865250825882
fps: 7.0125494470953695
TIMESTEP 10160 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.572222 / Loss  0.21272584795951843
fps: 7.99425157004946
TIMESTEP 10161 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  12.212039 / Loss  0.2633129954338074
fps: 10.861992256794727
TIMESTEP 10162 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.344515 / Loss  0.680185854434967
fps: 7.994312517987805
TIMESTEP 10163 / STATE explore / EPSILON 0 / ACT

TIMESTEP 10218 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.251672 / Loss  1.346271276473999
fps: 8.963605356402509
TIMESTEP 10219 / STATE explore / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  11.960101 / Loss  0.10318905860185623
fps: 8.22459316314065
TIMESTEP 10220 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.792699 / Loss  1.657836675643921
fps: 9.25273988316891
TIMESTEP 10221 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  14.561428 / Loss  0.4207412600517273
fps: 10.574506546188891
TIMESTEP 10222 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.924832 / Loss  0.6480048894882202
fps: 9.125948102923834
TIMESTEP 10223 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2628696 / Loss  0.25199460983276367
fps: 9.210052590551268
TIMESTEP 10224 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.296919 / Loss  0.17074725031852722
fps: 9.125948102923834
TIMESTEP 10225 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 10279 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.311102 / Loss  0.37191152572631836
fps: 9.427139382987427
TIMESTEP 10280 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.469001 / Loss  0.572919487953186
fps: 9.167917673957044
TIMESTEP 10281 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.308387 / Loss  0.49805325269699097
fps: 7.868396813098787
TIMESTEP 10282 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.7792563 / Loss  0.5589861273765564
fps: 8.124370958931705
TIMESTEP 10283 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  13.175153 / Loss  0.7018143534660339
fps: 7.112365190259784
TIMESTEP 10284 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.457632 / Loss  0.5236932039260864
fps: 7.899561920618359
TIMESTEP 10285 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.996113 / Loss  0.7994521856307983
fps: 8.15745826266901
TIMESTEP 10286 / STATE explore / EPSILON 0 / ACTIO

TIMESTEP 10341 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.367088 / Loss  0.44026902318000793
fps: 10.574506546188891
TIMESTEP 10342 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  17.64294 / Loss  0.9228854775428772
fps: 9.167777401339876
TIMESTEP 10343 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.276718 / Loss  1.7228648662567139
fps: 9.16781747879249
TIMESTEP 10344 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  16.136156 / Loss  2.3742055892944336
fps: 12.649257354826062
TIMESTEP 10345 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  15.988518 / Loss  0.7146145701408386
fps: 12.649257354826062
TIMESTEP 10346 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.91055 / Loss  0.7494166493415833
fps: 9.295721707061931
TIMESTEP 10347 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  17.30738 / Loss  1.8962807655334473
fps: 8.962207184203384
TIMESTEP 10348 / STATE explore / EPSILON 0 / ACTI

TIMESTEP 10403 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.913467 / Loss  0.6363669633865356
fps: 9.12590839070156
TIMESTEP 10404 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.55662274 / Loss  0.3152921795845032
fps: 10.745092815092251
TIMESTEP 10405 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.2192354 / Loss  0.5140377879142761
fps: 9.043365768360863
TIMESTEP 10406 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.780503 / Loss  0.41492342948913574
fps: 10.803045444711167
TIMESTEP 10407 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.471141 / Loss  0.7274066209793091
fps: 10.745615855423786
TIMESTEP 10408 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.650706 / Loss  0.3232990503311157
fps: 9.125948102923834
TIMESTEP 10409 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.8776689 / Loss  1.0442628860473633
fps: 10.630763615718399
TIMESTEP 10410 / STATE explore / EPSILON 0 /

TIMESTEP 10464 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.670721 / Loss  0.3227836489677429
fps: 9.043385266861867
TIMESTEP 10465 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.391171 / Loss  0.29811978340148926
fps: 9.2527807069018
TIMESTEP 10466 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.051388 / Loss  0.22259989380836487
fps: 9.125948102923834
TIMESTEP 10467 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.842747 / Loss  0.2211419939994812
fps: 10.630736671355612
TIMESTEP 10468 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.57824 / Loss  0.2670024037361145
fps: 10.518815377323238
TIMESTEP 10469 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.368662 / Loss  0.30034756660461426
fps: 9.33914631159404
TIMESTEP 10470 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.024249 / Loss  0.4273843467235565
fps: 10.46198513877214
TIMESTEP 10471 / STATE explore / EPSILON 0 / ACTION 

TIMESTEP 10526 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.747508 / Loss  0.6898525953292847
fps: 9.21003236672332
TIMESTEP 10527 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.657573 / Loss  0.3277250826358795
fps: 9.002564922590851
TIMESTEP 10528 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.364592 / Loss  0.39099442958831787
fps: 10.687649702887546
TIMESTEP 10529 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.340062 / Loss  0.6667595505714417
fps: 9.12596795916458
TIMESTEP 10530 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.702788 / Loss  0.6649786233901978
fps: 9.167797440022383
TIMESTEP 10531 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.528002 / Loss  0.396634578704834
fps: 9.12590839070156
TIMESTEP 10532 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.236967 / Loss  0.718703031539917
fps: 9.252699059796253
TIMESTEP 10533 / STATE explore / EPSILON 0 / ACTION 1 / 

fps: 10.80468735912291
TIMESTEP 10587 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.307363 / Loss  0.6656014919281006
fps: 9.2527807069018
TIMESTEP 10588 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.4964156 / Loss  0.3455080986022949
fps: 12.811494688227352
TIMESTEP 10589 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.69516 / Loss  0.2993153929710388
fps: 10.630736671355612
TIMESTEP 10590 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.3366704 / Loss  0.269317626953125
fps: 10.35530899493629
TIMESTEP 10591 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  0.7340808 / Loss  0.29425105452537537
fps: 10.921187755824732
TIMESTEP 10592 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.917675 / Loss  0.2485649287700653
fps: 9.16775736274497
TIMESTEP 10593 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.8074124 / Loss  0.4486296474933624
fps: 10.745092815092251
TIMESTEP 10594 / STATE expl

fps: 9.471889001300767
TIMESTEP 10648 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  6.265021 / Loss  0.3508158028125763
fps: 10.74501023443124
TIMESTEP 10649 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.259972 / Loss  0.6110091209411621
fps: 9.210072814468033
TIMESTEP 10650 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.92197 / Loss  0.1327207088470459
fps: 8.9622454844796
TIMESTEP 10651 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.507721 / Loss  0.17342060804367065
fps: 9.25265823678382
TIMESTEP 10652 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.684368 / Loss  0.33785584568977356
fps: 10.981233243967829
TIMESTEP 10653 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.121502 / Loss  0.19950860738754272
fps: 9.043385266861867
TIMESTEP 10654 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  5.715042 / Loss  0.17962898313999176
fps: 9.25265823678382
TIMESTEP 10655 / STATE explore /

TIMESTEP 10710 / STATE explore / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  6.650728 / Loss  0.22147053480148315
fps: 8.360916644074825
TIMESTEP 10711 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.222417 / Loss  0.33078768849372864
fps: 9.3401029697326
TIMESTEP 10712 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.240317 / Loss  0.30792051553726196
fps: 10.74506528806421
TIMESTEP 10713 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.073266 / Loss  0.2758011817932129
fps: 10.574506546188891
TIMESTEP 10714 / STATE explore / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  5.3996887 / Loss  0.18151968717575073
fps: 9.001850031549262
TIMESTEP 10715 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  4.7578344 / Loss  0.4288857579231262
fps: 6.489305181011262
TIMESTEP 10716 / STATE explore / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.4930935 / Loss  0.16255277395248413
fps: 10.68756800281312
TIMESTEP 10717 / STATE explore / EPSILON 0 / AC