In [1]:
import numpy as np
from PIL import Image
import cv2 #opencv
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

#keras imports
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "./chromedriver"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"


setting path

In [3]:
'''
* Game class: Selenium interfacing between the python and browser
* __init__():  Launch the broswer window using the attributes in chrome_options
* get_crashed() : return true if the agent as crashed on an obstacles. Gets javascript variable from game decribing the state
* get_playing(): true if game in progress, false is crashed or paused
* restart() : sends a signal to browser-javascript to restart the game
* press_up(): sends a single to press up get to the browser
* get_score(): gets current game score from javascript variables.
* pause(): pause the game
* resume(): resume a paused game if not crashed
* end(): close the browser and end the game
'''
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

銜接browser-javascript與python的模組，運行chrome並且連接至小恐龍遊戲的環境。
這裡可以得到狀態:撞毀、遊戲中、分數。指令:重啟遊戲、跳躍、暫停、恢復遊戲、結束。

In [4]:
class DinoAgent:
    def __init__(self,game): #takes game as input for taking actions
        self._game = game; 
        self.jump(); #to start the game, we need to jump once
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

這裡為DinoAgent的模組，可知Agent是否撞毀，並有跳躍與蹲下兩種行為。我們在遊戲中只考慮跳躍與不跳躍兩種。

In [5]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV, implemented using python coroutine 
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the Experience tuple

為了確認Game的state，這裡使用OpenCV的截圖來獲取環境的狀態(OpenCV中影像處理的速度有經過特別優化)，這裡的get_state方法為輸入一個action，然後回傳下一狀態的影像、reward(如果撞毀=-1，沒撞毀=0.1)、is_over(是否撞毀)。OpenCV平均可以達到5FPS的擷取速度，這對於每個跳Action後的State轉換已經足夠表示。

In [6]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)#processing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

儲存object方法，後面會儲存的資料有:行為，格式為(S,a,r,St+1,是否結束)、時間(為了辨認FPS的更新狀態)、epsilon(隨機行為的機率隨時間遞減的參數)，螢幕截圖(單純擷取chrome中的遊戲影像)，影像處理(只保留有興趣的影像區域，在小恐龍的遊戲中其實只在乎仙人掌與螢幕左邊界的距離。size為80*80)

In [7]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

使用DateFrame儲存loss,score,action,q_values。

In [8]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [9]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [10]:
'''Call only once to init file structure
'''
#init_cache()

'Call only once to init file structure\n'

如果想要執行訓練，第一次需要執行init_cache()，他會創建所需的檔案。在這之前須建立object空白資料夾以存放。

In [11]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    print("We finish building the model")
    return model

模型輸入為80*80*4，即每四張影像為一組State，再經過幾次捲積與池化，再展平成512的特徵向量，最後輸出(0,1)代表不跳與跳，這裡的類神經網路用來替代傳統的Q-table，由於影像為格式的state很難窮舉到Q-table中，因此使用類神經網路，好處是可以舉一反三，State的pixel值即使不盡相同，但是仍可以訓練出其特性。這個方法即為DQN，為一種off-policy learning。

In [12]:
''' 
main training module
Parameters:
* model => Keras Model to be trained
* game_state => Game State module with access to game environment and dino
* observe => flag to indicate wherther the model is to be trained(weight updates), else just play
'''
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,
                     #1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        #epsilon = load_obj("epsilon") 
        epsilon = 0
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[0] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        #if epsilon >= FINAL_EPSILON and t > OBSERVE:
            #epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 
            #epsilon=0
        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 20000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


這裡寫下訓練的過程，我們先觀察100次，然後到累積10萬筆之前都是EXPLORE狀態，但仍會訓練神經網路，這是因為epsilon(隨機動作的機率)在10萬次以後才會收斂到最低值，後面的狀態我們稱為訓練狀態。
訓練方式是這樣的:使用當前state預測下一步的action並且概率地(取決於epsilon)執行，這時候觀察St+1與reward並記錄下來，然後從replay buffer中隨機取出32筆資料出來訓練，得到32組(S,A,r,St+1,terminal)，如果恐龍在此State,S採取Action,A，陣亡了，那麼reward就不更新，但如果沒有陣亡，則reward=reward+GAMMA*max(Q_sa)，意思是此State的Action會讓你的恐龍活著，所以是好棒棒的，所以多給一點reward，然後在拿其S,A,r進行supervised learning，Ptrue為更新過的reward，在進行反向傳播法更新參數，如此一來網路就會更像理想的Q_table，

In [None]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [None]:
playGame(observe=False);

Now we build the model
We finish building the model
fps: 0.215493308272021
TIMESTEP 320001 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.396174 / Loss  3.8856799602508545
fps: 0.9123853754833237
TIMESTEP 320002 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.204363 / Loss  0.020749880000948906
fps: 5.637641770903059
TIMESTEP 320003 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.164232 / Loss  2.414555788040161
fps: 7.135900642252563
TIMESTEP 320004 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.083686 / Loss  0.04089697450399399
fps: 6.101222919966892
TIMESTEP 320005 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.904004 / Loss  0.44432443380355835
fps: 6.535464087158136
TIMESTEP 320006 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.267053 / Loss  0.019154895097017288
fps: 5.99106690111171
TIMESTEP 320007 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.038898 / Loss  0.0298473387956619

TIMESTEP 320061 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.288724 / Loss  0.023942384868860245
fps: 3.8435986818713324
TIMESTEP 320062 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.0591345 / Loss  0.022996719926595688
fps: 5.827024654002073
TIMESTEP 320063 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.033935 / Loss  0.029853232204914093
fps: 7.053008073208751
TIMESTEP 320064 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.934183 / Loss  0.013776391744613647
fps: 7.138147347640362
TIMESTEP 320065 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.105944 / Loss  0.1146804541349411
fps: 6.234380248822034
TIMESTEP 320066 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.899192 / Loss  0.5389309525489807
fps: 5.958063613415893
TIMESTEP 320067 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.770458 / Loss  0.9702993035316467
fps: 6.063893246203112
TIMESTEP 320068 / STATE train / EPSILON 0 / ACTI

TIMESTEP 320122 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.244808 / Loss  0.048689231276512146
fps: 5.951764966312671
TIMESTEP 320123 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.975339 / Loss  0.023605480790138245
fps: 6.01588342022791
TIMESTEP 320124 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.194627 / Loss  0.024496495723724365
fps: 6.132050286770264
TIMESTEP 320125 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.071356 / Loss  0.004357010126113892
fps: 5.995897239428244
TIMESTEP 320126 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.975541 / Loss  0.00271231634542346
fps: 6.06713871390032
TIMESTEP 320127 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.70193 / Loss  0.8360134959220886
fps: 7.223612646778199
TIMESTEP 320128 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.169338 / Loss  0.017084412276744843
fps: 7.1122204662456845
TIMESTEP 320129 / STATE train / EPSILON 0 / ACTION

TIMESTEP 320184 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.238222 / Loss  0.0213266983628273
fps: 6.931049811037648
TIMESTEP 320185 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.235319 / Loss  0.012403502129018307
fps: 7.194655002358592
TIMESTEP 320186 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.276084 / Loss  0.02407602034509182
fps: 6.986978242654127
TIMESTEP 320187 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.327284 / Loss  0.018533051013946533
fps: 7.125571881683987
TIMESTEP 320188 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.365924 / Loss  0.3269802927970886
fps: 7.043757473163785
TIMESTEP 320189 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.6743178 / Loss  0.04758094251155853
fps: 7.115224696895415
TIMESTEP 320190 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.675167 / Loss  0.010274015367031097
fps: 6.999302456753988
TIMESTEP 320191 / STATE train / EPSILON 0 / AC

fps: 5.995931524963368
TIMESTEP 320246 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.917196 / Loss  1.1954554319381714
fps: 5.996480146855936
TIMESTEP 320247 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.419979 / Loss  0.7940719723701477
fps: 6.066217349801857
TIMESTEP 320248 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.457732 / Loss  0.022206194698810577
fps: 6.025822672058961
TIMESTEP 320249 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.106314 / Loss  0.01621951535344124
fps: 7.175245316071111
TIMESTEP 320250 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.944967 / Loss  0.01592765934765339
fps: 6.012649445224921
TIMESTEP 320251 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.115246 / Loss  0.010860549286007881
fps: 6.007215583827811
TIMESTEP 320252 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.693668 / Loss  2.0125315189361572
fps: 5.887494402793901
TIMESTEP 320253 / STATE tr

fps: 5.488841865024059
TIMESTEP 320308 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.276719 / Loss  0.06416332721710205
fps: 5.9968488085808485
TIMESTEP 320309 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.050081 / Loss  0.03512981906533241
fps: 5.99822954936454
TIMESTEP 320310 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.313632 / Loss  0.00692222872748971
fps: 6.574350293112637
TIMESTEP 320311 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.404528 / Loss  0.014294522814452648
fps: 6.1288156198948505
TIMESTEP 320312 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.987148 / Loss  0.007644656579941511
fps: 5.8901649519861365
TIMESTEP 320313 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.444004 / Loss  0.47465819120407104
fps: 5.58069331828935
TIMESTEP 320314 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.425012 / Loss  0.019432321190834045
fps: 5.960366577187121
TIMESTEP 320315 / STATE trai

fps: 5.4408310351358296
TIMESTEP 320369 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.554359 / Loss  0.012441989034414291
fps: 5.4752996249548005
TIMESTEP 320370 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.633799 / Loss  0.01294207014143467
fps: 5.460160252029186
TIMESTEP 320371 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.628633 / Loss  0.011182460933923721
fps: 5.994817445741115
TIMESTEP 320372 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.817447 / Loss  0.028318915516138077
fps: 5.460878260144988
TIMESTEP 320373 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.950455 / Loss  0.030956119298934937
fps: 5.474120536774706
TIMESTEP 320374 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.581973 / Loss  0.03303776681423187
fps: 5.938534814508571
TIMESTEP 320375 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.70982 / Loss  0.09542723745107651
fps: 6.022508823470288
TIMESTEP 320376 / STATE tra

TIMESTEP 320430 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.209075 / Loss  0.12455379217863083
fps: 5.913233743310381
TIMESTEP 320431 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.392644 / Loss  0.012180821970105171
fps: 5.904858977860496
TIMESTEP 320432 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.504889 / Loss  0.027786899358034134
fps: 7.337869163064187
TIMESTEP 320433 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.983331 / Loss  0.021943675354123116
fps: 5.961095090625102
TIMESTEP 320434 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.408553 / Loss  0.05088997632265091
fps: 3.1976189604908454
TIMESTEP 320435 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.175558 / Loss  0.0882398933172226
fps: 5.32980241513788
TIMESTEP 320436 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.530492 / Loss  0.04076152294874191
fps: 5.856535064753726
TIMESTEP 320437 / STATE train / EPSILON 0 / ACTI

fps: 6.002418538887673
TIMESTEP 320491 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.327035 / Loss  0.010789690539240837
fps: 5.9806875153285
TIMESTEP 320492 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.632702 / Loss  0.0028062183409929276
fps: 5.448506640616881
TIMESTEP 320493 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.521413 / Loss  0.06892003118991852
fps: 5.478560839096899
TIMESTEP 320494 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.425583 / Loss  1.8684380054473877
fps: 6.008936821912549
TIMESTEP 320495 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.399794 / Loss  0.011521173641085625
fps: 5.977704237677045
TIMESTEP 320496 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.708059 / Loss  0.05037360265851021
fps: 6.012175421244634
TIMESTEP 320497 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.326656 / Loss  0.0053506153635680676
fps: 5.82555977016124
TIMESTEP 320498 / STATE

TIMESTEP 320552 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.52191 / Loss  0.06426233798265457
fps: 6.856698201268904
TIMESTEP 320553 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.609649 / Loss  0.008776398375630379
fps: 7.4025577036438275
TIMESTEP 320554 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.705151 / Loss  0.07357079535722733
fps: 6.971683122236628
TIMESTEP 320555 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.926958 / Loss  0.015761030837893486
fps: 7.178413606848607
TIMESTEP 320556 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.649506 / Loss  0.02243831939995289
fps: 7.095220292821558
TIMESTEP 320557 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.570925 / Loss  0.05429753288626671
fps: 7.128054355547549
TIMESTEP 320558 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.6302395 / Loss  0.009905362501740456
fps: 6.987176112719603
TIMESTEP 320559 / STATE train / EPSILON 0 / ACTION 

TIMESTEP 320613 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.6242485 / Loss  0.2488485872745514
fps: 6.053469884943338
TIMESTEP 320614 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.039314 / Loss  0.17061962187290192
fps: 5.987970676203325
TIMESTEP 320615 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.130532 / Loss  0.02006770670413971
fps: 7.088085983709061
TIMESTEP 320616 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.73393 / Loss  0.01075937133282423
fps: 6.066884213936912
TIMESTEP 320617 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.02854 / Loss  0.6502959132194519
fps: 6.06321827449325
TIMESTEP 320618 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.18321 / Loss  0.02014477550983429
fps: 7.126964276004568
TIMESTEP 320619 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.7479925 / Loss  0.04672592878341675
fps: 6.028715533956859
TIMESTEP 320620 / STATE train / EPSILON 0 / ACTION 1 / 

fps: 5.4608213813288335
TIMESTEP 320674 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.364081 / Loss  0.008760805241763592
fps: 6.056205717025022
TIMESTEP 320675 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.684466 / Loss  0.07841704040765762
fps: 6.655037326753882
TIMESTEP 320676 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.70474 / Loss  0.014231392182409763
fps: 5.274832296636878
TIMESTEP 320677 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.267665 / Loss  0.03861774876713753
fps: 6.540213001512529
TIMESTEP 320678 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.723692 / Loss  2.03523850440979
fps: 6.105352640803042
TIMESTEP 320679 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.438382 / Loss  0.02165910229086876
fps: 7.107929664727973
TIMESTEP 320680 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.612859 / Loss  0.018806517124176025
fps: 2.1364661824564726
TIMESTEP 320681 / STATE 

TIMESTEP 320735 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.32332 / Loss  0.4537302553653717
fps: 7.13300023468829
TIMESTEP 320736 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.979344 / Loss  0.01777997612953186
fps: 7.310142654222547
TIMESTEP 320737 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.984832 / Loss  0.036513760685920715
fps: 7.2347019739611005
TIMESTEP 320738 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.949509 / Loss  0.007885217666625977
fps: 7.3935409039068
TIMESTEP 320739 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.672334 / Loss  0.17834465205669403
fps: 7.134419804829758
TIMESTEP 320740 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.250458 / Loss  0.24053192138671875
fps: 7.101431019186357
TIMESTEP 320741 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.174245 / Loss  0.040424272418022156
fps: 3.7284856458373152
TIMESTEP 320742 / STATE train / EPSILON 0 / ACTI

TIMESTEP 320797 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.050309 / Loss  0.028233952820301056
fps: 4.30022278747838
TIMESTEP 320798 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.873754 / Loss  0.007725240662693977
fps: 5.791013947683072
TIMESTEP 320799 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.619543 / Loss  0.009158863686025143
fps: 5.8356995375188525
TIMESTEP 320800 / STATE train / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  11.0613785 / Loss  0.006334336940199137
fps: 7.105352853285267
TIMESTEP 320801 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.815128 / Loss  0.01408226415514946
fps: 6.001284874395658
TIMESTEP 320802 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.064813 / Loss  3.09755802154541
fps: 5.966861706592236
TIMESTEP 320803 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.983437 / Loss  0.04910753667354584
fps: 4.872955662105815
TIMESTEP 320804 / STATE train / EPSILON 0 / ACT

fps: 5.983870116344597
TIMESTEP 320858 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.501293 / Loss  0.013227399438619614
fps: 7.017770705858928
TIMESTEP 320859 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.302728 / Loss  0.12045059353113174
fps: 7.215361378422919
TIMESTEP 320860 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.674745 / Loss  0.27282410860061646
fps: 7.285736867429697
TIMESTEP 320861 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.07402 / Loss  0.018029898405075073
fps: 7.031429586862204
TIMESTEP 320862 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.145319 / Loss  0.003307835664600134
fps: 6.177114521647052
TIMESTEP 320863 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.25438 / Loss  0.12371008098125458
fps: 6.215560893707386
TIMESTEP 320864 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.490362 / Loss  0.027715448290109634
fps: 6.0177564032666275
TIMESTEP 320865 / STAT

fps: 6.051906414353839
TIMESTEP 320919 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.691592 / Loss  0.00961601734161377
fps: 6.135889719984376
TIMESTEP 320920 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.5992985 / Loss  0.36342740058898926
fps: 6.955312783979729
TIMESTEP 320921 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.800263 / Loss  0.0139199523255229
fps: 7.144616034531511
TIMESTEP 320922 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.082505 / Loss  0.008238783106207848
fps: 6.901682019081093
TIMESTEP 320923 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  1.2066015 / Loss  0.330270916223526
fps: 7.163285103359702
TIMESTEP 320924 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.709792 / Loss  0.008576136082410812
fps: 7.00394256315031
TIMESTEP 320925 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.034393 / Loss  0.36719754338264465
fps: 6.165999491349262
TIMESTEP 320926 / STATE tr

TIMESTEP 320980 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.236516 / Loss  0.09126710891723633
fps: 6.865452450284731
TIMESTEP 320981 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.465114 / Loss  0.05965903401374817
fps: 6.236419597055981
TIMESTEP 320982 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.661272 / Loss  0.021244674921035767
fps: 6.006492957847328
TIMESTEP 320983 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.06102 / Loss  4.769067287445068
fps: 7.115683397093877
TIMESTEP 320984 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.250191 / Loss  0.013302421197295189
fps: 7.130162788483768
TIMESTEP 320985 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.236896 / Loss  0.01844482496380806
fps: 6.9916136721647595
TIMESTEP 320986 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.2989855 / Loss  0.023108692839741707
fps: 7.095028257953417
TIMESTEP 320987 / STATE train / EPSILON 0 / AC

TIMESTEP 321042 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.532477 / Loss  0.00915894191712141
fps: 6.94266016701565
TIMESTEP 321043 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.765725 / Loss  0.018204357475042343
fps: 7.346364566256113
TIMESTEP 321044 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.074725 / Loss  0.14919039607048035
fps: 6.020511893022368
TIMESTEP 321045 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.593562 / Loss  0.0652528777718544
fps: 5.8687942418904155
TIMESTEP 321046 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.776012 / Loss  0.006007392890751362
fps: 6.241263756867274
TIMESTEP 321047 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.995216 / Loss  0.05211220681667328
fps: 2.9838319898525762
TIMESTEP 321048 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.044346 / Loss  0.0038432697765529156
fps: 5.913192060486696
TIMESTEP 321049 / STATE train / EPSILON 0 / ACTIO

fps: 5.669947535833391
TIMESTEP 321104 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.013409 / Loss  0.007150563411414623
fps: 6.0868790389407215
TIMESTEP 321105 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.919704 / Loss  0.02196520008146763
fps: 6.961685289567856
TIMESTEP 321106 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.140795 / Loss  0.003549950197339058
fps: 7.126201418680712
TIMESTEP 321107 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.932422 / Loss  0.005326302722096443
fps: 7.055250347354221
TIMESTEP 321108 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.171609 / Loss  0.38416439294815063
fps: 7.306780667875665
TIMESTEP 321109 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.958257 / Loss  0.008388122543692589
fps: 7.03497946183121
TIMESTEP 321110 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.064205 / Loss  2.660780906677246
fps: 6.042324901966999
TIMESTEP 321111 / STATE tr

fps: 6.0270782141368855
TIMESTEP 321165 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.945484 / Loss  0.007417688146233559
fps: 6.030136998118055
TIMESTEP 321166 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.89484 / Loss  0.006891242694109678
fps: 6.882813576900103
TIMESTEP 321167 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.903717 / Loss  0.3313286304473877
fps: 7.1876519810879405
TIMESTEP 321168 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.965793 / Loss  0.023357201367616653
fps: 6.78303619944627
TIMESTEP 321169 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.268423 / Loss  0.0047840941697359085
fps: 6.169654677359567
TIMESTEP 321170 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.013921 / Loss  0.009953873232007027
fps: 5.985970951449216
TIMESTEP 321171 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.27265 / Loss  0.008685750886797905
fps: 6.081230888789975
TIMESTEP 321172 / STATE 

fps: 6.074898034853541
TIMESTEP 321226 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.6259 / Loss  0.029276762157678604
fps: 5.930247584737435
TIMESTEP 321227 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.656483 / Loss  0.020840175449848175
fps: 6.931278888563888
TIMESTEP 321228 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.6815 / Loss  0.009914257563650608
fps: 7.15807725521116
TIMESTEP 321229 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.706869 / Loss  0.05466434359550476
fps: 6.469366822296567
TIMESTEP 321230 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.540269 / Loss  0.03829198703169823
fps: 7.38757659607821
TIMESTEP 321231 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.60093 / Loss  0.01179489865899086
fps: 7.045780817177423
TIMESTEP 321232 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.8142605 / Loss  0.01802578568458557
fps: 6.012166803318636
TIMESTEP 321233 / STATE trai

fps: 6.258361782892067
TIMESTEP 321287 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.097365 / Loss  2.597346305847168
fps: 7.020531138167564
TIMESTEP 321288 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.367695 / Loss  0.011723734438419342
fps: 6.000889910107634
TIMESTEP 321289 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.391406 / Loss  0.022399740293622017
fps: 7.258326829462448
TIMESTEP 321290 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.345044 / Loss  0.03095165081322193
fps: 7.274301412437217
TIMESTEP 321291 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.148498 / Loss  0.009470400400459766
fps: 7.123127427487437
TIMESTEP 321292 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.325349 / Loss  0.015493186190724373
fps: 6.132740671423559
TIMESTEP 321293 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9301577 / Loss  0.025100059807300568
fps: 5.976137079285651
TIMESTEP 321294 / STATE trai

TIMESTEP 321348 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.517658 / Loss  0.005948016420006752
fps: 5.880593624911496
TIMESTEP 321349 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.704332 / Loss  0.021770618855953217
fps: 5.947081334692622
TIMESTEP 321350 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.411741 / Loss  0.11958135664463043
fps: 6.152017448520557
TIMESTEP 321351 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.751983 / Loss  0.004311790224164724
fps: 3.3310651329311565
TIMESTEP 321352 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.532997 / Loss  0.010564200580120087
fps: 5.913267089992443
TIMESTEP 321353 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.449883 / Loss  2.860466957092285
fps: 6.1816391799678705
TIMESTEP 321354 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.798516 / Loss  0.013067584484815598
fps: 6.007172565509814
TIMESTEP 321355 / STATE train / EPSILON 0 / ACTION

fps: 7.057577175087245
TIMESTEP 321410 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.330829 / Loss  0.03176047280430794
fps: 6.173177911221005
TIMESTEP 321411 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.16804 / Loss  0.0311133936047554
fps: 6.090370245600992
TIMESTEP 321412 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.0741825 / Loss  0.238765150308609
fps: 5.979715492242901
TIMESTEP 321413 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.136284 / Loss  0.011413326486945152
fps: 7.198717574650817
TIMESTEP 321414 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.440858 / Loss  0.03403973579406738
fps: 6.820561021221238
TIMESTEP 321415 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.147629 / Loss  0.009009618312120438
fps: 6.142377428618709
TIMESTEP 321416 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.179813 / Loss  0.014798934571444988
fps: 6.0792388892351354
TIMESTEP 321417 / STATE train /

TIMESTEP 321471 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.838995 / Loss  0.008477140218019485
fps: 7.012420480668757
TIMESTEP 321472 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.729172 / Loss  0.010047715157270432
fps: 6.932768261660802
TIMESTEP 321473 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.164932 / Loss  0.31792786717414856
fps: 6.868038100602752
TIMESTEP 321474 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.891316 / Loss  0.005394986364990473
fps: 7.100745744347664
TIMESTEP 321475 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.990203 / Loss  0.004707117099314928
fps: 6.880273222519861
TIMESTEP 321476 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.778492 / Loss  0.02689259499311447
fps: 6.780656644610383
TIMESTEP 321477 / STATE train / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  8.815089 / Loss  0.012895844876766205
fps: 6.877915194925732
TIMESTEP 321478 / STATE train / EPSILON 0 / ACTION 

TIMESTEP 321532 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.107669 / Loss  0.016260165721178055
fps: 5.917404995428946
TIMESTEP 321533 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.458422 / Loss  0.016720285639166832
fps: 6.017980894223788
TIMESTEP 321534 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.155754 / Loss  0.03534160554409027
fps: 6.011020839274889
TIMESTEP 321535 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.279812 / Loss  0.04099089279770851
fps: 5.738586253839471
TIMESTEP 321536 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.693209 / Loss  0.0859622210264206
fps: 5.727685631023944
TIMESTEP 321537 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.373622 / Loss  0.7252106070518494
fps: 5.920386872204288
TIMESTEP 321538 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.589299 / Loss  0.04347376152873039
fps: 6.760775514677912
TIMESTEP 321539 / STATE train / EPSILON 0 / ACTION 0 / 

TIMESTEP 321593 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.812001 / Loss  0.00988334883004427
fps: 5.938408695442877
TIMESTEP 321594 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.986782 / Loss  0.030329443514347076
fps: 5.66653111570153
TIMESTEP 321595 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.749249 / Loss  0.017695197835564613
fps: 6.049279158181367
TIMESTEP 321596 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.44851 / Loss  0.5461704134941101
fps: 6.802375631290606
TIMESTEP 321597 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.842932 / Loss  0.0472625270485878
fps: 7.064614472483013
TIMESTEP 321598 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.018071 / Loss  0.013036565855145454
fps: 5.385563780407162
TIMESTEP 321599 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.806241 / Loss  0.012791695073246956
fps: 6.63075505135538
TIMESTEP 321600 / STATE train / EPSILON 0 / ACTION 0 / 

TIMESTEP 321654 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.463394 / Loss  1.5030335187911987
fps: 5.42968843044962
TIMESTEP 321655 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.295408 / Loss  0.01217880379408598
fps: 6.6316671568115915
TIMESTEP 321656 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.223317 / Loss  0.01488913781940937
fps: 3.5312069049707775
TIMESTEP 321657 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.112934 / Loss  0.02074430137872696
fps: 6.038314749897425
TIMESTEP 321658 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.115793 / Loss  0.01960676908493042
fps: 5.488568927320336
TIMESTEP 321659 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.630449 / Loss  0.03442363813519478
fps: 6.640118671249294
TIMESTEP 321660 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.040299 / Loss  0.00956552941352129
fps: 5.519111532185905
TIMESTEP 321661 / STATE train / EPSILON 0 / ACTION 1 / 

TIMESTEP 321715 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.951093 / Loss  0.013533160090446472
fps: 5.858023751628857
TIMESTEP 321716 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.057567 / Loss  0.015120148658752441
fps: 6.003294845756276
TIMESTEP 321717 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.030573 / Loss  2.3835902214050293
fps: 5.741508480910252
TIMESTEP 321718 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.06735 / Loss  0.023616986349225044
fps: 5.712730675303697
TIMESTEP 321719 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.143124 / Loss  0.007221762556582689
fps: 6.042568640275685
TIMESTEP 321720 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.756121 / Loss  0.011060813441872597
fps: 6.001645539372033
TIMESTEP 321721 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.768738 / Loss  0.00842374563217163
fps: 6.0114860652272055
TIMESTEP 321722 / STATE train / EPSILON 0 / ACTION 

fps: 5.648125978823082
TIMESTEP 321777 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.92784 / Loss  0.011223167181015015
fps: 5.582854929500601
TIMESTEP 321778 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.531993 / Loss  0.028588291257619858
fps: 4.959476892333706
TIMESTEP 321779 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.815865 / Loss  0.3378020226955414
fps: 5.456168444714444
TIMESTEP 321780 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.060706 / Loss  0.029225816950201988
fps: 5.743293130456691
TIMESTEP 321781 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.328923 / Loss  0.011781834065914154
fps: 5.445959842527864
TIMESTEP 321782 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.8052545 / Loss  0.10400828719139099
fps: 5.191439325582633
TIMESTEP 321783 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.192347 / Loss  0.039712484925985336
fps: 5.749591498788204
TIMESTEP 321784 / STATE trai

fps: 5.445549747736378
TIMESTEP 321838 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.86887 / Loss  0.014814765192568302
fps: 5.475771402460916
TIMESTEP 321839 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.747173 / Loss  0.02235698699951172
fps: 4.225475331520627
TIMESTEP 321840 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.7622595 / Loss  0.009622083976864815
fps: 4.680260084872384
TIMESTEP 321841 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.922696 / Loss  0.010282028466463089
fps: 6.285117459042556
TIMESTEP 321842 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.714007 / Loss  0.017718622460961342
fps: 6.285079786556539
TIMESTEP 321843 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.9747505 / Loss  0.11531738936901093
fps: 6.344345637406237
TIMESTEP 321844 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.660619 / Loss  0.015525034628808498
fps: 5.317282069355456
TIMESTEP 321845 / STATE tr

fps: 5.43091174177977
TIMESTEP 321899 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.192845 / Loss  0.03209267929196358
fps: 6.662204340443512
TIMESTEP 321900 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.405071 / Loss  0.022199392318725586
fps: 5.984032323416786
TIMESTEP 321901 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.319756 / Loss  0.025839805603027344
fps: 5.047236495349033
TIMESTEP 321902 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.960024 / Loss  0.033628832548856735
fps: 5.475685618685435
TIMESTEP 321903 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.485857 / Loss  0.023242663592100143
fps: 5.895761937560619
TIMESTEP 321904 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.460728 / Loss  0.024310510605573654
fps: 5.475799997650042
TIMESTEP 321905 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.7611685 / Loss  0.02624307945370674
fps: 4.874790651401023
TIMESTEP 321906 / ST

fps: 4.827697974217311
TIMESTEP 321961 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.685549 / Loss  0.07328654825687408
fps: 4.225228447003619
TIMESTEP 321962 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.446799 / Loss  0.018023721873760223
fps: 6.03767153359273
TIMESTEP 321963 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.712197 / Loss  0.11702088266611099
fps: 3.325576859341625
TIMESTEP 321964 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  4.518715 / Loss  0.01751808449625969
fps: 5.930708777208725
TIMESTEP 321965 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.603803 / Loss  0.024708867073059082
fps: 5.059931839430588
TIMESTEP 321966 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.914771 / Loss  0.011135231703519821
fps: 5.508550505176533
TIMESTEP 321967 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.860208 / Loss  0.00735400291159749
fps: 5.44552146761357
TIMESTEP 321968 / STATE train /

TIMESTEP 322022 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.034988 / Loss  0.020700521767139435
fps: 5.259638523244688
TIMESTEP 322023 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.811502 / Loss  0.009328252635896206
fps: 5.098645686826779
TIMESTEP 322024 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.033963 / Loss  0.0057911342009902
fps: 6.217993515609828
TIMESTEP 322025 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.965794 / Loss  0.026557374745607376
fps: 5.930759093454297
TIMESTEP 322026 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.977094 / Loss  2.2573728561401367
fps: 6.187840609002257
TIMESTEP 322027 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.982518 / Loss  0.18540990352630615
fps: 5.287226613218369
TIMESTEP 322028 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.930535 / Loss  0.048291340470314026
fps: 5.54879917025403
TIMESTEP 322029 / STATE train / EPSILON 0 / ACT

fps: 6.385501668574769
TIMESTEP 322083 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.174108 / Loss  0.05236915126442909
fps: 5.797858237849778
TIMESTEP 322084 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.991291 / Loss  0.04017665982246399
fps: 5.58288465403665
TIMESTEP 322085 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.095093 / Loss  0.026206284761428833
fps: 5.735643469580319
TIMESTEP 322086 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.211075 / Loss  0.09005731344223022
fps: 7.012654968609191
TIMESTEP 322087 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.152605 / Loss  0.009567479602992535
fps: 6.688221355470581
TIMESTEP 322088 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.116445 / Loss  3.247006416320801
fps: 7.030097834303519
TIMESTEP 322089 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.113322 / Loss  0.016747107729315758
fps: 6.89681345586869
TIMESTEP 322090 / STATE tr

fps: 4.02145001404625
TIMESTEP 322145 / STATE train / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  9.997985 / Loss  0.1722695380449295
fps: 5.021777417528013
TIMESTEP 322146 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.34455 / Loss  0.006193498149514198
fps: 5.034423405851463
TIMESTEP 322147 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.110123 / Loss  0.46721887588500977
fps: 6.002014836523491
TIMESTEP 322148 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.972356 / Loss  0.03227897360920906
fps: 4.702740824209399
TIMESTEP 322149 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.8823395 / Loss  0.04609019309282303
fps: 4.584094105920529
TIMESTEP 322150 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.057172 / Loss  0.16240257024765015
fps: 5.82701655869731
TIMESTEP 322151 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.097004 / Loss  0.0256363395601511
fps: 4.874807648500007
TIMESTEP 322152 / STATE train / 

fps: 5.460600987107198
TIMESTEP 322207 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.50576 / Loss  0.2406669706106186
fps: 6.636577963854316
TIMESTEP 322208 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.41697 / Loss  0.009171595796942711
fps: 5.4977631728995915
TIMESTEP 322209 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.559265 / Loss  0.01014508493244648
fps: 5.4712642478665705
TIMESTEP 322210 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.349636 / Loss  0.04827552288770676
fps: 5.4176674976426975
TIMESTEP 322211 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.545901 / Loss  0.041506409645080566
fps: 5.002163392160744
TIMESTEP 322212 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.729902 / Loss  0.03543468937277794
fps: 6.67023531787872
TIMESTEP 322213 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.696737 / Loss  0.11271113902330399
fps: 6.709673291068975
TIMESTEP 322214 / STATE 

fps: 6.6815410716134735
TIMESTEP 322268 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.407912 / Loss  0.03420162945985794
fps: 6.017799573303797
TIMESTEP 322269 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.518921 / Loss  0.024486560374498367
fps: 5.445988127204076
TIMESTEP 322270 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.831728 / Loss  0.11719608306884766
fps: 2.8229700688595933
TIMESTEP 322271 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.372847 / Loss  0.07312405109405518
fps: 5.5518692850600155
TIMESTEP 322272 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.827742 / Loss  0.01122037973254919
fps: 6.020079831323676
TIMESTEP 322273 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.289608 / Loss  0.013335593044757843
fps: 6.053225266414972
TIMESTEP 322274 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.292533 / Loss  0.0806739404797554
fps: 6.638752372224359
TIMESTEP 322275 / STAT

TIMESTEP 322329 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.090647 / Loss  0.015033060684800148
fps: 6.639225258252526
TIMESTEP 322330 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.230861 / Loss  0.06061798706650734
fps: 6.684501336325257
TIMESTEP 322331 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.335688 / Loss  0.023485887795686722
fps: 5.6998668224934095
TIMESTEP 322332 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.082772 / Loss  0.011626409366726875
fps: 6.583854870584985
TIMESTEP 322333 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.96805 / Loss  0.004771010484546423
fps: 6.385559997685897
TIMESTEP 322334 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.068815 / Loss  0.008738002739846706
fps: 5.629824902854305
TIMESTEP 322335 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.874554 / Loss  0.019465574994683266
fps: 7.012854295688928
TIMESTEP 322336 / STATE train / EPSILON 0 / 

fps: 5.804429535402517
TIMESTEP 322390 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.249034 / Loss  1.6877284049987793
fps: 6.001851653108387
TIMESTEP 322391 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.86333 / Loss  0.11364027112722397
fps: 6.740403977096501
TIMESTEP 322392 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.310443 / Loss  0.021716080605983734
fps: 6.798185985146862
TIMESTEP 322393 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.977573 / Loss  0.047968171536922455
fps: 6.55301637197937
TIMESTEP 322394 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.408756 / Loss  0.11457287520170212
fps: 6.704868110384119
TIMESTEP 322395 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.052261 / Loss  0.10230400413274765
fps: 5.509288533655583
TIMESTEP 322396 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.023813 / Loss  0.014158891513943672
fps: 5.881236793065299
TIMESTEP 322397 / STATE train / 

TIMESTEP 322451 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.022798 / Loss  0.028864368796348572
fps: 6.438224996124144
TIMESTEP 322452 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.8399935 / Loss  0.10915786027908325
fps: 6.757377154825197
TIMESTEP 322453 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.280202 / Loss  0.2292119860649109
fps: 6.864789488500593
TIMESTEP 322454 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.092602 / Loss  0.0048414552584290504
fps: 7.095520368114765
TIMESTEP 322455 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.953345 / Loss  0.6815938949584961
fps: 5.856894897721227
TIMESTEP 322456 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.063583 / Loss  0.007237286772578955
fps: 6.147229696852448
TIMESTEP 322457 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.089776 / Loss  0.0102021973580122
fps: 6.797458835731881
TIMESTEP 322458 / STATE train / EPSILON 0 / ACTI

TIMESTEP 322513 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.019316 / Loss  0.019283156841993332
fps: 5.843040612736459
TIMESTEP 322514 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.164261 / Loss  0.01676972769200802
fps: 6.018689013890484
TIMESTEP 322515 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.156956 / Loss  0.012449253350496292
fps: 7.049475529470689
TIMESTEP 322516 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.787965 / Loss  0.02318413369357586
fps: 7.037576322973488
TIMESTEP 322517 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.053903 / Loss  0.020729534327983856
fps: 7.087499091737088
TIMESTEP 322518 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.143825 / Loss  0.008708486333489418
fps: 6.971914893617021
TIMESTEP 322519 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.120323 / Loss  0.01637258194386959
fps: 6.08208626249971
TIMESTEP 322520 / STATE train / EPSILON 0 / AC

fps: 6.988293706992786
TIMESTEP 322575 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.081307 / Loss  0.017024673521518707
fps: 7.088349517844649
TIMESTEP 322576 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.234197 / Loss  1.16123366355896
fps: 6.700508652199473
TIMESTEP 322577 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.059636 / Loss  0.42153480648994446
fps: 6.683020876154785
TIMESTEP 322578 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.298091 / Loss  0.3139808475971222
fps: 3.512608546727958
TIMESTEP 322579 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.179791 / Loss  0.022825658321380615
fps: 5.694202489302083
TIMESTEP 322580 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.5072155 / Loss  0.006788273341953754
fps: 5.820886781271815
TIMESTEP 322581 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.207648 / Loss  0.01686280034482479
fps: 7.058289776150296
TIMESTEP 322582 / STATE t

TIMESTEP 322636 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.43671 / Loss  0.012796180322766304
fps: 5.927724874006111
TIMESTEP 322637 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.401207 / Loss  4.943358421325684
fps: 5.928537606169529
TIMESTEP 322638 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.595468 / Loss  0.012538004666566849
fps: 6.783880959120133
TIMESTEP 322639 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.514201 / Loss  0.06444071978330612
fps: 5.536508741083356
TIMESTEP 322640 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.459137 / Loss  2.211599349975586
fps: 6.0645508306704645
TIMESTEP 322641 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.961955 / Loss  0.0283534936606884
fps: 5.94174845304744
TIMESTEP 322642 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.986747 / Loss  0.034354496747255325
fps: 5.710482891576763
TIMESTEP 322643 / STATE train / EPSILON 0 / ACTION 1 

fps: 6.48918470247682
TIMESTEP 322698 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.796148 / Loss  0.024144170805811882
fps: 6.6400766228934645
TIMESTEP 322699 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.7328205 / Loss  0.015900861471891403
fps: 5.793229567362476
TIMESTEP 322700 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.041321 / Loss  0.01723875105381012
fps: 6.662236087196853
TIMESTEP 322701 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.701301 / Loss  0.06637588143348694
fps: 5.582892085220118
TIMESTEP 322702 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.8743515 / Loss  0.010925337672233582
fps: 6.531566169749237
TIMESTEP 322703 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.901607 / Loss  0.12353770434856415
fps: 6.510326672399395
TIMESTEP 322704 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.829862 / Loss  0.004107932560145855
fps: 6.618095407904848
TIMESTEP 322705 / STATE t

fps: 5.7589463841683015
TIMESTEP 322760 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.742913 / Loss  0.15218980610370636
fps: 7.037139444049233
TIMESTEP 322761 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.410511 / Loss  0.011880068108439445
fps: 5.866783042673068
TIMESTEP 322762 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.415336 / Loss  0.032198499888181686
fps: 5.886585480527537
TIMESTEP 322763 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.51179 / Loss  0.015779629349708557
fps: 6.718002107832616
TIMESTEP 322764 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.531736 / Loss  0.021957136690616608
fps: 5.942977621224272
TIMESTEP 322765 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.581984 / Loss  0.012709380127489567
fps: 5.92368965166597
TIMESTEP 322766 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.804766 / Loss  0.018873989582061768
fps: 6.090343715014644
TIMESTEP 322767 / ST

TIMESTEP 322821 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.11404 / Loss  0.015175078064203262
fps: 6.892314164207261
TIMESTEP 322822 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.163869 / Loss  0.015958355739712715
fps: 7.012912923457112
TIMESTEP 322823 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.872324 / Loss  0.7864671349525452
fps: 5.8717273162523345
TIMESTEP 322824 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.772546 / Loss  0.011617699638009071
fps: 7.010170194061008
TIMESTEP 322825 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.860297 / Loss  0.018012525513768196
fps: 6.9552089810876465
TIMESTEP 322826 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.0259504 / Loss  0.4212337136268616
fps: 6.077098504452449
TIMESTEP 322827 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.867697 / Loss  0.16703428328037262
fps: 6.988363568505658
TIMESTEP 322828 / STATE train / EPSILON 0 / ACTIO

fps: 5.437043624121924
TIMESTEP 322882 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.290745 / Loss  0.012833972461521626
fps: 5.446002269652359
TIMESTEP 322883 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.501853 / Loss  0.00629464304074645
fps: 3.506353865278494
TIMESTEP 322884 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.067085 / Loss  0.008613417856395245
fps: 6.074986023040981
TIMESTEP 322885 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.527714 / Loss  0.007188880350440741
fps: 5.452841668443414
TIMESTEP 322886 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.067919 / Loss  0.024402949959039688
fps: 6.645621084453796
TIMESTEP 322887 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.403746 / Loss  0.045760199427604675
fps: 6.722061154463179
TIMESTEP 322888 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.74563 / Loss  0.013035456649959087
fps: 6.639698211653931
TIMESTEP 322889 / ST

TIMESTEP 322942 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.958127 / Loss  0.7221871614456177
fps: 5.590519464767219
TIMESTEP 322943 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.461896 / Loss  0.043054528534412384
fps: 5.475678470158801
TIMESTEP 322944 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.625738 / Loss  0.03812151402235031
fps: 5.464108075715533
TIMESTEP 322945 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.5512 / Loss  0.02225576713681221
fps: 5.429737633484321
TIMESTEP 322946 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.929152 / Loss  0.018433576449751854
fps: 5.434437115266759
TIMESTEP 322947 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.764835 / Loss  0.23969464004039764
fps: 6.68003000539906
TIMESTEP 322948 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.538176 / Loss  0.019836386665701866
fps: 6.665200450035278
TIMESTEP 322949 / STATE train / EPSILON 0 / ACTION

fps: 5.887320859593223
TIMESTEP 323003 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.048822 / Loss  0.011889099143445492
fps: 6.083277131639596
TIMESTEP 323004 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.034643 / Loss  0.13837097585201263
fps: 5.881533687358985
TIMESTEP 323005 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.907616 / Loss  0.003861824283376336
fps: 6.805675853284547
TIMESTEP 323006 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.954194 / Loss  5.630581855773926
fps: 6.100415391596756
TIMESTEP 323007 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.998315 / Loss  0.016751952469348907
fps: 6.964251438742607
TIMESTEP 323008 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.852952 / Loss  0.13222354650497437
fps: 6.964008613890812
TIMESTEP 323009 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.761769 / Loss  0.014826243743300438
fps: 6.044048902235876
TIMESTEP 323010 / STATE trai

TIMESTEP 323064 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.445468 / Loss  0.04880838096141815
fps: 5.982265593911482
TIMESTEP 323065 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.301642 / Loss  0.01678834669291973
fps: 6.96405486493801
TIMESTEP 323066 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.833895 / Loss  0.134463369846344
fps: 6.832148843391589
TIMESTEP 323067 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.769765 / Loss  0.05973371863365173
fps: 5.875255815647443
TIMESTEP 323068 / STATE train / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  10.374562 / Loss  0.03560136631131172
fps: 4.69173066898738
TIMESTEP 323069 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.96519 / Loss  0.0267794169485569
fps: 5.46250691880417
TIMESTEP 323070 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.206115 / Loss  0.03846834972500801
fps: 5.896018859145406
TIMESTEP 323071 / STATE train / EPSILON 0 / ACTION 1 / REWA

TIMESTEP 323125 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.967251 / Loss  0.019265063107013702
fps: 6.634835264266868
TIMESTEP 323126 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.90158 / Loss  0.42423126101493835
fps: 6.721964196996651
TIMESTEP 323127 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.050703 / Loss  0.08229684829711914
fps: 6.680774811768398
TIMESTEP 323128 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.087397 / Loss  0.06398393213748932
fps: 5.404264859362719
TIMESTEP 323129 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.882424 / Loss  0.038523897528648376
fps: 6.660902984008004
TIMESTEP 323130 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.881071 / Loss  0.084603451192379
fps: 6.747311084156712
TIMESTEP 323131 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.115886 / Loss  0.011203834787011147
fps: 5.953961638484164
TIMESTEP 323132 / STATE train / EPSILON 0 / ACTI

TIMESTEP 323186 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.244456 / Loss  0.023274114355444908
fps: 5.7494811585859
TIMESTEP 323187 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.327908 / Loss  0.014611605554819107
fps: 5.881286273366042
TIMESTEP 323188 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.306619 / Loss  0.02115738019347191
fps: 5.886139385017395
TIMESTEP 323189 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.071404 / Loss  0.021953629329800606
fps: 6.7730795254343485
TIMESTEP 323190 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.278191 / Loss  0.0261991024017334
fps: 5.984015248574013
TIMESTEP 323191 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.045427 / Loss  0.23768088221549988
fps: 3.1876867234186794
TIMESTEP 323192 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.474752 / Loss  0.054387740790843964
fps: 6.66218317610937
TIMESTEP 323193 / STATE train / EPSILON 0 / AC

TIMESTEP 323248 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.879255 / Loss  0.531683087348938
fps: 6.964702444111235
TIMESTEP 323249 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.025227 / Loss  0.011969461105763912
fps: 7.152547633299057
TIMESTEP 323250 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.260573 / Loss  0.015007914043962955
fps: 5.895190160215551
TIMESTEP 323251 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.919292 / Loss  0.012845948338508606
fps: 5.981472185460685
TIMESTEP 323252 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.093204 / Loss  0.04381536692380905
fps: 5.802133104621727
TIMESTEP 323253 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.040559 / Loss  0.048860132694244385
fps: 5.78880002208252
TIMESTEP 323254 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.08288 / Loss  0.025923550128936768
fps: 6.911781475658912
TIMESTEP 323255 / STATE train / EPSILON 0 / ACTIO

TIMESTEP 323309 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.649322 / Loss  0.1515345722436905
fps: 5.74326953754558
TIMESTEP 323310 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.746974 / Loss  0.3751031756401062
fps: 5.387106303510741
TIMESTEP 323311 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.790545 / Loss  0.046754561364650726
fps: 6.426564886899737
TIMESTEP 323312 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.794256 / Loss  0.13326919078826904
fps: 6.265410687770936
TIMESTEP 323313 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.986693 / Loss  0.02330399677157402
fps: 6.168692843717184
TIMESTEP 323314 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.882304 / Loss  0.11404956877231598
fps: 6.506943183951324
TIMESTEP 323315 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.80013 / Loss  0.7848001718521118
fps: 5.431178975065522
TIMESTEP 323316 / STATE train / EPSILON 0 / ACTION 1

fps: 4.354393268517028
TIMESTEP 323370 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.381751 / Loss  0.014928176999092102
fps: 5.047151466142975
TIMESTEP 323371 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.580357 / Loss  0.009927552193403244
fps: 5.431143811288553
TIMESTEP 323372 / STATE train / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  9.933996 / Loss  0.027714747935533524
fps: 4.9472158299432305
TIMESTEP 323373 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.483877 / Loss  0.014674143865704536
fps: 4.542411722341653
TIMESTEP 323374 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.896683 / Loss  0.012696728110313416
fps: 5.1645149414078135
TIMESTEP 323375 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.389031 / Loss  0.011222300119698048
fps: 4.922805349671074
TIMESTEP 323376 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.256799 / Loss  0.018853962421417236
fps: 4.713845157925968
TIMESTEP 323377 / STATE 

fps: 3.0985876369847327
TIMESTEP 323431 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.937256 / Loss  0.01833769679069519
fps: 5.4017940280630805
TIMESTEP 323432 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.029164 / Loss  0.005391478072851896
fps: 5.218464111040469
TIMESTEP 323433 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.025368 / Loss  0.02481492981314659
fps: 5.678037712758685
TIMESTEP 323434 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.854076 / Loss  0.24646086990833282
fps: 6.552995895690866
TIMESTEP 323435 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.055225 / Loss  0.013590162619948387
fps: 6.056555522343662
TIMESTEP 323436 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.55995 / Loss  0.0959235280752182
fps: 4.89869751275391
TIMESTEP 323437 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.118809 / Loss  0.029290683567523956
fps: 5.273492624686304
TIMESTEP 323438 / STATE tr

TIMESTEP 323492 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.924111 / Loss  0.18710321187973022
fps: 6.453430656035843
TIMESTEP 323493 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.226772 / Loss  0.03311725705862045
fps: 5.315610845392077
TIMESTEP 323494 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.2393875 / Loss  0.016389794647693634
fps: 5.603380487915664
TIMESTEP 323495 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.0748625 / Loss  0.015240289270877838
fps: 6.245817809267681
TIMESTEP 323496 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.036218 / Loss  0.02398911491036415
fps: 6.574525481885313
TIMESTEP 323497 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.275015 / Loss  0.09417427331209183
fps: 6.348071273654084
TIMESTEP 323498 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.03587 / Loss  0.006606367416679859
fps: 5.475799997650042
TIMESTEP 323499 / STATE train / EPSILON 0 / A

fps: 5.431164909500095
TIMESTEP 323553 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.333713 / Loss  0.9692913889884949
fps: 6.752275145733287
TIMESTEP 323554 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.517201 / Loss  0.015577848069369793
fps: 6.38552111149341
TIMESTEP 323555 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.42686 / Loss  0.023215148597955704
fps: 6.82139296605001
TIMESTEP 323556 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.279368 / Loss  0.014919068664312363
fps: 6.468169628330812
TIMESTEP 323557 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.386311 / Loss  0.009802816435694695
fps: 6.285145713703442
TIMESTEP 323558 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.765444 / Loss  0.021449469029903412
fps: 6.868274278752469
TIMESTEP 323559 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.218666 / Loss  0.040953584015369415
fps: 5.75242238427072
TIMESTEP 323560 / STATE 

fps: 5.614218443392059
TIMESTEP 323614 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.754351 / Loss  0.3601929545402527
fps: 5.677830181016791
TIMESTEP 323615 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.748677 / Loss  0.011448302306234837
fps: 6.226347088495602
TIMESTEP 323616 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.425995 / Loss  0.0821990817785263
fps: 6.531627197879633
TIMESTEP 323617 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.80371 / Loss  0.3414061963558197
fps: 6.706937355285208
TIMESTEP 323618 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.7444725 / Loss  0.06498248875141144
fps: 6.510316567197771
TIMESTEP 323619 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.9643955 / Loss  0.0070771449245512486
fps: 5.567315299052271
TIMESTEP 323620 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.611403 / Loss  0.010455023497343063
fps: 5.661482974263311
TIMESTEP 323621 / STATE 

TIMESTEP 323675 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.281566 / Loss  0.019467070698738098
fps: 6.6620244478894
TIMESTEP 323676 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.274038 / Loss  0.15426768362522125
fps: 5.810057847026751
TIMESTEP 323677 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.161151 / Loss  0.042096491903066635
fps: 6.706958804923836
TIMESTEP 323678 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.048298 / Loss  0.027445457875728607
fps: 6.863958158091978
TIMESTEP 323679 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.137932 / Loss  0.2692607045173645
fps: 5.475785700018147
TIMESTEP 323680 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.355693 / Loss  0.035579320043325424
fps: 5.630051611777418
TIMESTEP 323681 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.264666 / Loss  0.07847879081964493
fps: 6.038045277738589
TIMESTEP 323682 / STATE train / EPSILON 0 / ACT

fps: 5.842129088601988
TIMESTEP 323736 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.097525 / Loss  0.08519035577774048
fps: 5.645936337730132
TIMESTEP 323737 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.172024 / Loss  0.5176764130592346
fps: 6.510326672399395
TIMESTEP 323738 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.020412 / Loss  0.008302183821797371
fps: 6.396944818529969
TIMESTEP 323739 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.199398 / Loss  0.007969139143824577
fps: 3.07014786733882
TIMESTEP 323740 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.000113 / Loss  0.04751845821738243
fps: 6.101995152509725
TIMESTEP 323741 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.178756 / Loss  0.16185298562049866
fps: 6.207043314445203
TIMESTEP 323742 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.887258 / Loss  0.02119820937514305
fps: 5.678022339516794
TIMESTEP 323743 / STATE t

TIMESTEP 323797 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.6006155 / Loss  1.5347483158111572
fps: 5.793229567362476
TIMESTEP 323798 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.456812 / Loss  0.02404697611927986
fps: 6.265410687770936
TIMESTEP 323799 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.328068 / Loss  0.033451102674007416
fps: 6.639466981572619
TIMESTEP 323800 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.495866 / Loss  0.10260175168514252
fps: 6.493193013744013
TIMESTEP 323801 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.227198 / Loss  0.04452196881175041
fps: 6.706937355285208
TIMESTEP 323802 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.020529 / Loss  0.024344665929675102
fps: 6.752242535078851
TIMESTEP 323803 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.370277 / Loss  0.018621455878019333
fps: 5.82701655869731
TIMESTEP 323804 / STATE train / EPSILON 0 / AC

TIMESTEP 323859 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  3.923215 / Loss  0.37127041816711426
fps: 6.574309073581783
TIMESTEP 323860 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.60926 / Loss  0.06692879647016525
fps: 6.618105850461214
TIMESTEP 323861 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.508927 / Loss  0.05618276819586754
fps: 6.640108159110411
TIMESTEP 323862 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.485566 / Loss  0.013719246722757816
fps: 6.662236087196853
TIMESTEP 323863 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.720512 / Loss  2.73079514503479
fps: 6.706905181084508
TIMESTEP 323864 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.512281 / Loss  0.006159675307571888
fps: 6.385511390019289
TIMESTEP 323865 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.43615 / Loss  1.7002837657928467
fps: 6.38552111149341
TIMESTEP 323866 / STATE train / EPSILON 0 / ACTION 0 / 

fps: 6.640055598915251
TIMESTEP 323920 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.099639 / Loss  0.06050656735897064
fps: 6.640087134932496
TIMESTEP 323921 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.876524 / Loss  0.022304479032754898
fps: 6.553006133819123
TIMESTEP 323922 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.233365 / Loss  0.04011055827140808
fps: 6.771406522939423
TIMESTEP 323923 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.91773 / Loss  0.009583353996276855
fps: 6.661484828511757
TIMESTEP 323924 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.973678 / Loss  0.01190030574798584
fps: 5.895745362743389
TIMESTEP 323925 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.936086 / Loss  0.008621500805020332
fps: 6.729567679879345
TIMESTEP 323926 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.175238 / Loss  0.0271805040538311
fps: 6.684480030089136
TIMESTEP 323927 / STATE trai

fps: 3.157312970617438
TIMESTEP 323982 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.458223 / Loss  0.010394640266895294
fps: 5.445973984829244
TIMESTEP 323983 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.815989 / Loss  0.008071649819612503
fps: 5.567322688845853
TIMESTEP 323984 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.699302 / Loss  0.024701528251171112
fps: 5.666064167510976
TIMESTEP 323985 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.830197 / Loss  0.012814812362194061
fps: 5.614233473076656
TIMESTEP 323986 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.678348 / Loss  0.05883587896823883
fps: 5.776498489184656
TIMESTEP 323987 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.619203 / Loss  0.025088869035243988
fps: 5.660130683673717
TIMESTEP 323988 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.991225 / Loss  0.017049072310328484
fps: 5.810098088513767
TIMESTEP 323989 / STATE tr

TIMESTEP 324043 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.439044 / Loss  0.01052150595933199
fps: 5.614218443392059
TIMESTEP 324044 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.602704 / Loss  0.009255513548851013
fps: 6.53159668367188
TIMESTEP 324045 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.560962 / Loss  0.016753770411014557
fps: 5.699262709630431
TIMESTEP 324046 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.495228 / Loss  0.046722762286663055
fps: 6.510346882896755
TIMESTEP 324047 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.193554 / Loss  0.01840957999229431
fps: 5.726825378620309
TIMESTEP 324048 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.624472 / Loss  0.015103339217603207
fps: 5.810057847026751
TIMESTEP 324049 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.537208 / Loss  0.020049743354320526
fps: 6.499521946306125
TIMESTEP 324050 / STATE train / EPSILON 0 / ACTION 

TIMESTEP 324105 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.433365 / Loss  0.010656566359102726
fps: 6.639866389102163
TIMESTEP 324106 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.655634 / Loss  0.008042491041123867
fps: 6.706958804923836
TIMESTEP 324107 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.962318 / Loss  0.0125475088134408
fps: 5.6923632316816635
TIMESTEP 324108 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.468383 / Loss  0.36557379364967346
fps: 6.074959626317123
TIMESTEP 324109 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.62539 / Loss  0.024465832859277725
fps: 4.9594827565820125
TIMESTEP 324110 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.850544 / Loss  0.026150789111852646
fps: 6.304958067701984
TIMESTEP 324111 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.265919 / Loss  0.01785273477435112
fps: 6.775114485320842
TIMESTEP 324112 / STATE train / EPSILON 0 / ACTION 

TIMESTEP 324167 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.646849 / Loss  0.22195222973823547
fps: 6.38553083299713
TIMESTEP 324168 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.527929 / Loss  0.060264669358730316
fps: 6.5962488716105065
TIMESTEP 324169 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.367136 / Loss  0.07460756599903107
fps: 5.614233473076656
TIMESTEP 324170 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.581506 / Loss  0.022174809128046036
fps: 5.521196620124711
TIMESTEP 324171 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.818487 / Loss  0.046444687992334366
fps: 5.895778512471043
TIMESTEP 324172 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.594917 / Loss  0.019537299871444702
fps: 5.551854587426768
TIMESTEP 324173 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.498761 / Loss  0.03430020809173584
fps: 5.389425208192257
TIMESTEP 324174 / STATE train / EPSILON 0 / A

fps: 5.4310172254615185
TIMESTEP 324228 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.559002 / Loss  0.5140408277511597
fps: 5.009308469345589
TIMESTEP 324229 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.957687 / Loss  0.06813749670982361
fps: 6.706937355285208
TIMESTEP 324230 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.193483 / Loss  0.10122691839933395
fps: 6.7042357980882965
TIMESTEP 324231 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.176565 / Loss  0.04588682949542999
fps: 5.963112137906522
TIMESTEP 324232 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.462037 / Loss  0.02142065390944481
fps: 5.4484004764731635
TIMESTEP 324233 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.560398 / Loss  0.08498496562242508
fps: 5.490494435942984
TIMESTEP 324234 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.108807 / Loss  0.43007510900497437
fps: 5.417016775520062
TIMESTEP 324235 / STATE 

fps: 6.07499482199991
TIMESTEP 324290 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.702996 / Loss  0.07855391502380371
fps: 5.475799997650042
TIMESTEP 324291 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.5364685 / Loss  2.5545966625213623
fps: 6.008885170418486
TIMESTEP 324292 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.446338 / Loss  0.019997159019112587
fps: 6.596031030865741
TIMESTEP 324293 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.480476 / Loss  0.1494004726409912
fps: 4.615871548526968
TIMESTEP 324294 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.799045 / Loss  0.5739860534667969
fps: 5.954299731976559
TIMESTEP 324295 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.558231 / Loss  0.05287551507353783
fps: 6.682169110032723
TIMESTEP 324296 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.419773 / Loss  0.05792481452226639
fps: 5.490674123179072
TIMESTEP 324297 / STATE train / EP

fps: 5.952930690427461
TIMESTEP 324352 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.9704275 / Loss  0.28533515334129333
fps: 6.775158261330284
TIMESTEP 324353 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.926536 / Loss  0.00998962763696909
fps: 5.4906956864377605
TIMESTEP 324354 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.031427 / Loss  0.024073738604784012
fps: 6.6175942039503886
TIMESTEP 324355 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.819528 / Loss  0.5818974375724792
fps: 5.519031647260224
TIMESTEP 324356 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.988225 / Loss  0.020762000232934952
fps: 5.416471129098702
TIMESTEP 324357 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.818506 / Loss  0.042568787932395935
fps: 3.3366724156584717
TIMESTEP 324358 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.847694 / Loss  0.0603262335062027
fps: 4.96998437068609
TIMESTEP 324359 / STATE tra

fps: 5.694202489302083
TIMESTEP 324413 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.718743 / Loss  0.14876630902290344
fps: 5.8270003681552645
TIMESTEP 324414 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.782956 / Loss  0.07377428561449051
fps: 6.510164992937743
TIMESTEP 324415 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.872124 / Loss  0.28978028893470764
fps: 6.662236087196853
TIMESTEP 324416 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.855379 / Loss  0.015350589528679848
fps: 6.38552111149341
TIMESTEP 324417 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.936622 / Loss  0.08445937186479568
fps: 5.759784676018429
TIMESTEP 324418 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.6990595 / Loss  0.02978823520243168
fps: 5.948422378288839
TIMESTEP 324419 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.794569 / Loss  0.04338931292295456
fps: 5.726825378620309
TIMESTEP 324420 / STATE train 

TIMESTEP 324474 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.560336 / Loss  0.040135644376277924
fps: 6.662214922661008
TIMESTEP 324475 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.519286 / Loss  0.009266531094908714
fps: 6.700283870802841
TIMESTEP 324476 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.643928 / Loss  0.005588204599916935
fps: 6.752264275480142
TIMESTEP 324477 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.708004 / Loss  0.01869356818497181
fps: 6.591304320495836
TIMESTEP 324478 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.648432 / Loss  0.014276166446506977
fps: 6.665338145250615
TIMESTEP 324479 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.709731 / Loss  0.03820912167429924
fps: 6.684533295934109
TIMESTEP 324480 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.6174965 / Loss  0.0809650719165802
fps: 5.536413735752424
TIMESTEP 324481 / STATE train / EPSILON 0 / ACTION 

fps: 5.726841017266662
TIMESTEP 324536 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.7411785 / Loss  0.007342757191509008
fps: 5.751641443922132
TIMESTEP 324537 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.020501 / Loss  0.016443122178316116
fps: 5.793213564032193
TIMESTEP 324538 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.194124 / Loss  0.0368468351662159
fps: 5.661949167643105
TIMESTEP 324539 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.793578 / Loss  0.08954986184835434
fps: 6.552995895690866
TIMESTEP 324540 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.641505 / Loss  0.04071804881095886
fps: 6.382042714286584
TIMESTEP 324541 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.804996 / Loss  0.019744576886296272
fps: 6.662214922661008
TIMESTEP 324542 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.97367 / Loss  0.006608068011701107
fps: 6.637239589514745
TIMESTEP 324543 / STATE tra

TIMESTEP 324597 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.652222 / Loss  0.28166598081588745
fps: 3.2604645630913245
TIMESTEP 324598 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.67313 / Loss  0.06531117856502533
fps: 5.301494650226568
TIMESTEP 324599 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.055057 / Loss  0.026305481791496277
fps: 5.4591013569972695
TIMESTEP 324600 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.726065 / Loss  0.007408273406326771
fps: 6.5745460929429855
TIMESTEP 324601 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.808685 / Loss  0.03544580936431885
fps: 5.6780453994108475
TIMESTEP 324602 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.831232 / Loss  0.08864708244800568
fps: 5.4164361555130265
TIMESTEP 324603 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.1859255 / Loss  0.03817106783390045
fps: 6.659528119145152
TIMESTEP 324604 / STATE train / EPSILON 0 / AC

fps: 5.021759380095926
TIMESTEP 324659 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.013138 / Loss  0.03701881319284439
fps: 5.1645085822641414
TIMESTEP 324660 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.382837 / Loss  0.024310696870088577
fps: 4.8629442448316125
TIMESTEP 324661 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.583943 / Loss  0.06461258232593536
fps: 5.460835600921791
TIMESTEP 324662 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.6200485 / Loss  0.6760887503623962
fps: 6.015288107687939
TIMESTEP 324663 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.786899 / Loss  0.35637444257736206
fps: 5.966174215591724
TIMESTEP 324664 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.6985655 / Loss  0.027112619951367378
fps: 6.0144427938030205
TIMESTEP 324665 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.684235 / Loss  0.16557084023952484
fps: 3.1278017860139076
TIMESTEP 324666 / STATE 

fps: 5.6780453994108475
TIMESTEP 324720 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.819581 / Loss  0.06181570887565613
fps: 5.445945700299934
TIMESTEP 324721 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.020169 / Loss  0.011191156692802906
fps: 5.445959842527864
TIMESTEP 324722 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.101115 / Loss  0.008778701536357403
fps: 5.41644315019403
TIMESTEP 324723 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.067289 / Loss  0.08370748162269592
fps: 6.14974436570879
TIMESTEP 324724 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.021314 / Loss  0.010940290987491608
fps: 5.710467342143026
TIMESTEP 324725 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.100319 / Loss  0.012934079393744469
fps: 5.431164909500095
TIMESTEP 324726 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.01981 / Loss  0.009313082322478294
fps: 5.475807146493988
TIMESTEP 324727 / STATE

fps: 5.150878864874755
TIMESTEP 324781 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.948717 / Loss  0.06039898842573166
fps: 5.475785700018147
TIMESTEP 324782 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.079803 / Loss  0.008379368111491203
fps: 5.6941947588424
TIMESTEP 324783 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.726704 / Loss  0.013009706512093544
fps: 4.811802631729898
TIMESTEP 324784 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.126911 / Loss  0.05959688127040863
fps: 5.38725160905825
TIMESTEP 324785 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.02167 / Loss  0.0173783116042614
fps: 5.079024326448367
TIMESTEP 324786 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.870852 / Loss  0.028194110840559006
fps: 5.1913365175513615
TIMESTEP 324787 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.98672 / Loss  2.416303873062134
fps: 5.273505885429633
TIMESTEP 324788 / STATE train / EPSI

TIMESTEP 324842 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.60545 / Loss  0.013266153633594513
fps: 6.819241418442484
TIMESTEP 324843 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.803271 / Loss  0.008110211230814457
fps: 6.900546541586188
TIMESTEP 324844 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.073656 / Loss  0.04985632374882698
fps: 6.443348270453536
TIMESTEP 324845 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.834489 / Loss  0.01875271089375019
fps: 5.786196630895278
TIMESTEP 324846 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.497128 / Loss  0.03397747874259949
fps: 5.6465444010506065
TIMESTEP 324847 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.066229 / Loss  0.06639128923416138
fps: 5.776538267136579
TIMESTEP 324848 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.37982 / Loss  0.024794436991214752
fps: 5.8652899010776025
TIMESTEP 324849 / STATE train / EPSILON 0 / ACTI

fps: 6.489194742183826
TIMESTEP 324904 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.827107 / Loss  0.012515362352132797
fps: 6.804030860873007
TIMESTEP 324905 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.364139 / Loss  0.032678913325071335
fps: 6.752275145733287
TIMESTEP 324906 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.0953045 / Loss  0.7801380157470703
fps: 6.939868757631813
TIMESTEP 324907 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.258979 / Loss  0.008210957981646061
fps: 5.761572792621765
TIMESTEP 324908 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.61234 / Loss  0.021994173526763916
fps: 5.793229567362476
TIMESTEP 324909 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.495185 / Loss  0.017726216465234756
fps: 6.707226936983182
TIMESTEP 324910 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.737118 / Loss  0.015142615884542465
fps: 6.86826303180558
TIMESTEP 324911 / STA

fps: 5.410573977206029
TIMESTEP 324965 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.451337 / Loss  0.10596923530101776
fps: 5.4458820611831795
TIMESTEP 324966 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.554815 / Loss  0.048201434314250946
fps: 5.511981843580926
TIMESTEP 324967 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.544764 / Loss  0.03127788379788399
fps: 5.443168177034194
TIMESTEP 324968 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.371928 / Loss  0.03974295035004616
fps: 5.389556787985012
TIMESTEP 324969 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.614846 / Loss  1.1724247932434082
fps: 6.060073310880612
TIMESTEP 324970 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.624426 / Loss  0.02187597006559372
fps: 6.684522642697202
TIMESTEP 324971 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.837317 / Loss  0.0234510637819767
fps: 5.458532992188903
TIMESTEP 324972 / STATE train / 

fps: 5.476979809507891
TIMESTEP 325026 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.510754 / Loss  0.020900364965200424
fps: 5.436212087631505
TIMESTEP 325027 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.311498 / Loss  0.04556697607040405
fps: 5.035511348288303
TIMESTEP 325028 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.255504 / Loss  0.007925957441329956
fps: 3.3200540795552054
TIMESTEP 325029 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.480392 / Loss  0.041110560297966
fps: 4.839384240488657
TIMESTEP 325030 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.532362 / Loss  0.06059250980615616
fps: 6.265420046994714
TIMESTEP 325031 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.396963 / Loss  3.9620325565338135
fps: 5.451778523577168
TIMESTEP 325032 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.347471 / Loss  2.395251750946045
fps: 4.989411651347008
TIMESTEP 325033 / STATE tra

fps: 5.659176065336213
TIMESTEP 325088 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.5878725 / Loss  0.01388821005821228
fps: 6.4533909388280115
TIMESTEP 325089 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.617033 / Loss  0.011262794956564903
fps: 6.802364599129088
TIMESTEP 325090 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.702307 / Loss  0.01594487391412258
fps: 5.490846633991035
TIMESTEP 325091 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.394993 / Loss  0.23309321701526642
fps: 5.838916607270136
TIMESTEP 325092 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.690187 / Loss  0.009615754708647728
fps: 5.616827298426894
TIMESTEP 325093 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.303968 / Loss  0.08345017582178116
fps: 6.670383829387957
TIMESTEP 325094 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.846432 / Loss  0.0157451294362545
fps: 6.601533320846836
TIMESTEP 325095 / STATE train

fps: 5.997714913265372
TIMESTEP 325150 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.118577 / Loss  0.0535857118666172
fps: 5.884355678889545
TIMESTEP 325151 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.310186 / Loss  0.09374508261680603
fps: 5.65309698253786
TIMESTEP 325152 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.948825 / Loss  0.020967092365026474
fps: 6.704150070009654
TIMESTEP 325153 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.291025 / Loss  0.0653415322303772
fps: 6.75228601602143
TIMESTEP 325154 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.956738 / Loss  0.02670707367360592
fps: 6.8314255373209205
TIMESTEP 325155 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.012292 / Loss  0.010308736935257912
fps: 5.6910038724243766
TIMESTEP 325156 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.98407 / Loss  0.058153629302978516
fps: 5.8940883515174
TIMESTEP 325157 / STATE train / EP

TIMESTEP 325211 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.557067 / Loss  1.1172138452529907
fps: 6.602468583877858
TIMESTEP 325212 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.746999 / Loss  0.053405147045850754
fps: 6.6366304690083595
TIMESTEP 325213 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.210439 / Loss  0.05300569534301758
fps: 6.648012160193751
TIMESTEP 325214 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.390637 / Loss  0.02209308370947838
fps: 5.808754324396939
TIMESTEP 325215 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.98829 / Loss  0.03360087051987648
fps: 5.82698417770319
TIMESTEP 325216 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.244073 / Loss  0.019950037822127342
fps: 6.760023659979661
TIMESTEP 325217 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.956187 / Loss  0.01438668929040432
fps: 6.668676346199098
TIMESTEP 325218 / STATE train / EPSILON 0 / ACTION

fps: 5.773190881387007
TIMESTEP 325272 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.766859 / Loss  0.015559653751552105
fps: 5.5792605953706405
TIMESTEP 325273 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.548864 / Loss  0.027927890419960022
fps: 5.734874919329257
TIMESTEP 325274 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.035434 / Loss  0.05908900499343872
fps: 6.553047086652064
TIMESTEP 325275 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.512046 / Loss  0.015011962503194809
fps: 6.855588681416392
TIMESTEP 325276 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.940905 / Loss  0.016865704208612442
fps: 6.6399925277794045
TIMESTEP 325277 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.15617 / Loss  0.02591456100344658
fps: 6.977667757991961
TIMESTEP 325278 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.07979 / Loss  0.04153471440076828
fps: 5.748196795002693
TIMESTEP 325279 / STATE t

TIMESTEP 325333 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.191937 / Loss  0.02274092473089695
fps: 5.985885522885005
TIMESTEP 325334 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.407682 / Loss  0.032807305455207825
fps: 3.463893550390465
TIMESTEP 325335 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.647409 / Loss  0.017796451225876808
fps: 6.207024943209986
TIMESTEP 325336 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.30355 / Loss  0.02774672955274582
fps: 5.5213783225915165
TIMESTEP 325337 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.701359 / Loss  0.0686756819486618
fps: 5.4536499358976025
TIMESTEP 325338 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.626213 / Loss  0.01568964123725891
fps: 5.440689882399334
TIMESTEP 325339 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.757647 / Loss  0.02844102308154106
fps: 6.35017607819507
TIMESTEP 325340 / STATE train / EPSILON 0 / ACTION 0 /

fps: 5.795663102099359
TIMESTEP 325395 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.888938 / Loss  0.031938258558511734
fps: 5.764288737775481
TIMESTEP 325396 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.798606 / Loss  0.00884807575494051
fps: 5.724339919804999
TIMESTEP 325397 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.945784 / Loss  0.04769941791892052
fps: 5.654293460017714
TIMESTEP 325398 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.128555 / Loss  0.020900212228298187
fps: 5.690316716298442
TIMESTEP 325399 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.7953205 / Loss  0.35215142369270325
fps: 5.4814462441615195
TIMESTEP 325400 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.0300865 / Loss  1.013998031616211
fps: 5.218451125667967
TIMESTEP 325401 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.923977 / Loss  0.059288375079631805
fps: 5.475406839976711
TIMESTEP 325402 / STATE trai

TIMESTEP 325456 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.266176 / Loss  0.012546725571155548
fps: 6.572712214736915
TIMESTEP 325457 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.028072 / Loss  0.02496735192835331
fps: 5.737440170934325
TIMESTEP 325458 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.672066 / Loss  0.03451279550790787
fps: 5.972248287410348
TIMESTEP 325459 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.479026 / Loss  0.032039061188697815
fps: 5.809381873382937
TIMESTEP 325460 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.390885 / Loss  0.019318712875247
fps: 5.888949262390257
TIMESTEP 325461 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.291674 / Loss  0.04878025874495506
fps: 6.671519100094164
TIMESTEP 325462 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.468028 / Loss  0.48595762252807617
fps: 6.8302908118565515
TIMESTEP 325463 / STATE train / EPSILON 0 / ACTION 0 /

TIMESTEP 325518 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.384106 / Loss  0.01312749832868576
fps: 6.78412235767558
TIMESTEP 325519 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.614405 / Loss  0.01616278849542141
fps: 6.545755321357061
TIMESTEP 325520 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.718761 / Loss  0.012741858139634132
fps: 6.683532040066416
TIMESTEP 325521 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.539274 / Loss  0.03855438902974129
fps: 6.7069266305173425
TIMESTEP 325522 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.552377 / Loss  0.029284058138728142
fps: 6.582057112547529
TIMESTEP 325523 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.8515 / Loss  0.04441424459218979
fps: 7.008518559364233
TIMESTEP 325524 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.410062 / Loss  0.01373688131570816
fps: 6.662214922661008
TIMESTEP 325525 / STATE train / EPSILON 0 / ACTION 0 / 

fps: 5.4597835510232775
TIMESTEP 325579 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.926383 / Loss  0.018128272145986557
fps: 5.433620454012543
TIMESTEP 325580 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.8043165 / Loss  0.00985165685415268
fps: 6.6588197565595095
TIMESTEP 325581 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.831682 / Loss  0.06071551889181137
fps: 5.410490224027595
TIMESTEP 325582 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.670368 / Loss  0.02917908877134323
fps: 6.6838515572213515
TIMESTEP 325583 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.918791 / Loss  0.013178691267967224
fps: 5.453281222777825
TIMESTEP 325584 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.072057 / Loss  0.019010469317436218
fps: 6.0450942297603465
TIMESTEP 325585 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.381292 / Loss  0.02231789566576481
fps: 6.683489440087195
TIMESTEP 325586 / STATE

TIMESTEP 325640 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.202728 / Loss  0.025298120453953743
fps: 5.473256194809787
TIMESTEP 325641 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.60176 / Loss  0.015979036688804626
fps: 6.6674784483101215
TIMESTEP 325642 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.9034 / Loss  0.020000725984573364
fps: 2.87165639568201
TIMESTEP 325643 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.591164 / Loss  0.053094033151865005
fps: 4.9472158299432305
TIMESTEP 325644 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.033917 / Loss  0.0224011093378067
fps: 5.445309376054839
TIMESTEP 325645 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.106618 / Loss  0.01797659695148468
fps: 6.662236087196853
TIMESTEP 325646 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  2.4654965 / Loss  0.008508456870913506
fps: 5.460835600921791
TIMESTEP 325647 / STATE train / EPSILON 0 / ACTION 

TIMESTEP 325701 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.182701 / Loss  0.018027443438768387
fps: 5.449214507319659
TIMESTEP 325702 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.480195 / Loss  0.009800695814192295
fps: 5.43493708939655
TIMESTEP 325703 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.753405 / Loss  0.022629164159297943
fps: 5.500329812275836
TIMESTEP 325704 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2211368 / Loss  0.57369464635849
fps: 6.587101586977519
TIMESTEP 325705 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.793074 / Loss  0.009752467274665833
fps: 6.713453624666072
TIMESTEP 325706 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.722435 / Loss  0.06910170614719391
fps: 6.6607972050182624
TIMESTEP 325707 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.486799 / Loss  0.018798571079969406
fps: 5.465104824944297
TIMESTEP 325708 / STATE train / EPSILON 0 / AC

TIMESTEP 325763 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.902981 / Loss  0.010263707488775253
fps: 5.744134737718591
TIMESTEP 325764 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.730647 / Loss  0.01640954241156578
fps: 5.7365769861492355
TIMESTEP 325765 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.258557 / Loss  0.0139935202896595
fps: 5.7117815681067645
TIMESTEP 325766 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.823225 / Loss  0.014311997219920158
fps: 6.669991349040761
TIMESTEP 325767 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.269488 / Loss  0.01169887837022543
fps: 6.690771265268098
TIMESTEP 325768 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.380165 / Loss  0.020680859684944153
fps: 6.540376176917295
TIMESTEP 325769 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.516867 / Loss  0.00979695562273264
fps: 5.46520451986824
TIMESTEP 325770 / STATE train / EPSILON 0 / A

TIMESTEP 325824 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.598194 / Loss  0.05824873968958855
fps: 6.777698238152412
TIMESTEP 325825 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.170489 / Loss  0.017504233866930008
fps: 6.617959657670849
TIMESTEP 325826 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.440365 / Loss  0.02333351969718933
fps: 5.719632846367942
TIMESTEP 325827 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.356028 / Loss  1.464192271232605
fps: 5.708602024678319
TIMESTEP 325828 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.289755 / Loss  0.0148597601801157
fps: 6.633208870838928
TIMESTEP 325829 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.825784 / Loss  0.03161947801709175
fps: 6.765519321625995
TIMESTEP 325830 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.315984 / Loss  0.030986331403255463
fps: 6.8801603614036875
TIMESTEP 325831 / STATE train / EPSILON 0 / ACTION 0 / 

TIMESTEP 325885 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.969931 / Loss  2.5890607833862305
fps: 6.8315145513417725
TIMESTEP 325886 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.311927 / Loss  0.03096717782318592
fps: 6.733489377141187
TIMESTEP 325887 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.079732 / Loss  0.030685419216752052
fps: 6.8178558308436035
TIMESTEP 325888 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.032978 / Loss  0.009183506481349468
fps: 6.7722265189177495
TIMESTEP 325889 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.235244 / Loss  0.4686571955680847
fps: 6.618137178328039
TIMESTEP 325890 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.0760765 / Loss  0.017246022820472717
fps: 6.821404060018801
TIMESTEP 325891 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.295828 / Loss  0.01986616849899292
fps: 6.729502896813745
TIMESTEP 325892 / STATE train / EPSILON 0 / A

TIMESTEP 325946 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.769214 / Loss  0.007849049754440784
fps: 5.45804997514516
TIMESTEP 325947 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.269023 / Loss  0.030724048614501953
fps: 5.449143712372583
TIMESTEP 325948 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.798234 / Loss  1.6411415338516235
fps: 5.4679546847093485
TIMESTEP 325949 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.997734 / Loss  1.604133129119873
fps: 3.3311153980251427
TIMESTEP 325950 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.940611 / Loss  0.0450313538312912
fps: 6.001997658904035
TIMESTEP 325951 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.75288 / Loss  0.01633615419268608
fps: 5.460800052078247
TIMESTEP 325952 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.704498 / Loss  3.2012414932250977
fps: 6.691304965149985
TIMESTEP 325953 / STATE train / EPSILON 0 / ACTION 0 / RE

fps: 6.827233380374999
TIMESTEP 326007 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.199889 / Loss  0.018135521560907364
fps: 6.429313336562575
TIMESTEP 326008 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.270678 / Loss  0.19787293672561646
fps: 6.991986637171535
TIMESTEP 326009 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.094814 / Loss  4.149619102478027
fps: 6.391028219662339
TIMESTEP 326010 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.576342 / Loss  0.06480265408754349
fps: 6.672325695341483
TIMESTEP 326011 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.850163 / Loss  2.509688377380371
fps: 6.662849917236691
TIMESTEP 326012 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.791603 / Loss  0.01031921710819006
fps: 5.727787314293986
TIMESTEP 326013 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.646613 / Loss  0.04006563127040863
fps: 5.669985859858491
TIMESTEP 326014 / STATE train / EPS

fps: 5.50596173410784
TIMESTEP 326068 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.066726 / Loss  0.02026192843914032
fps: 5.661933881396887
TIMESTEP 326069 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.403303 / Loss  0.008387668058276176
fps: 4.637295571338859
TIMESTEP 326070 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.782711 / Loss  1.569193959236145
fps: 5.278762387327861
TIMESTEP 326071 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.168664 / Loss  0.12914206087589264
fps: 5.445373001787729
TIMESTEP 326072 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.993378 / Loss  1.357567548751831
fps: 4.886660755874578
TIMESTEP 326073 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.003505 / Loss  0.027995606884360313
fps: 6.1221867204593785
TIMESTEP 326074 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.879902 / Loss  0.016908571124076843
fps: 6.75228601602143
TIMESTEP 326075 / STATE train / EP

fps: 6.052954461818208
TIMESTEP 326130 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.19325 / Loss  0.051880091428756714
fps: 7.156416250063983
TIMESTEP 326131 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.280927 / Loss  0.016790373250842094
fps: 6.106561529994817
TIMESTEP 326132 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.12585 / Loss  0.012124182656407356
fps: 5.931723653190439
TIMESTEP 326133 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.599807 / Loss  0.013713082298636436
fps: 6.975857366197651
TIMESTEP 326134 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.047114 / Loss  0.023674286901950836
fps: 5.287259938105473
TIMESTEP 326135 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.927273 / Loss  0.05486578494310379
fps: 5.736357307854848
TIMESTEP 326136 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.62205 / Loss  1.082858920097351
fps: 6.228834413226626
TIMESTEP 326137 / STATE train / 

fps: 3.512611488438701
TIMESTEP 326191 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.333424 / Loss  0.007683651056140661
fps: 5.460849820588802
TIMESTEP 326192 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.988493 / Loss  0.009604542516171932
fps: 6.07682555964608
TIMESTEP 326193 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.731613 / Loss  0.020466402173042297
fps: 5.960891766635354
TIMESTEP 326194 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.82183 / Loss  2.7096049785614014
fps: 6.66027893652868
TIMESTEP 326195 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.606278 / Loss  0.038966938853263855
fps: 6.67813681704919
TIMESTEP 326196 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.114534 / Loss  0.008261344395577908
fps: 6.71269077127932
TIMESTEP 326197 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.889315 / Loss  0.046567026525735855
fps: 6.679859787260134
TIMESTEP 326198 / STATE train / 

TIMESTEP 326252 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.574794 / Loss  0.06467375159263611
fps: 5.635028690347119
TIMESTEP 326253 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.49298 / Loss  0.07176414132118225
fps: 5.460849820588802
TIMESTEP 326254 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.520206 / Loss  0.037453845143318176
fps: 4.532119092950332
TIMESTEP 326255 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.382899 / Loss  0.05615321919322014
fps: 5.0471453927378835
TIMESTEP 326256 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.5477 / Loss  3.708442211151123
fps: 4.344812655188381
TIMESTEP 326257 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.845215 / Loss  0.025411542505025864
fps: 5.412081977937752
TIMESTEP 326258 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.450981 / Loss  0.06047218292951584
fps: 6.112158875467596
TIMESTEP 326259 / STATE train / EPSILON 0 / ACTION

TIMESTEP 326313 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.923327 / Loss  0.015561647713184357
fps: 5.034435491518059
TIMESTEP 326314 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.964818 / Loss  0.04150772467255592
fps: 5.416429160850088
TIMESTEP 326315 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.156992 / Loss  0.07593953609466553
fps: 5.475807146493988
TIMESTEP 326316 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.747471 / Loss  0.2889423072338104
fps: 6.019544160934354
TIMESTEP 326317 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.900773 / Loss  0.2733907997608185
fps: 6.020088471949914
TIMESTEP 326318 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.209249 / Loss  0.1278800517320633
fps: 6.002006247701472
TIMESTEP 326319 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.7428465 / Loss  0.1261533796787262
fps: 6.001971892659207
TIMESTEP 326320 / STATE train / EPSILON 0 / ACTION 0 / 

fps: 5.072764831379683
TIMESTEP 326375 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.391734 / Loss  0.017328090965747833
fps: 5.23210839683801
TIMESTEP 326376 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.20437 / Loss  0.04077565670013428
fps: 5.177910172115382
TIMESTEP 326377 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.423687 / Loss  0.033452875912189484
fps: 5.301414239977148
TIMESTEP 326378 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.350333 / Loss  0.018207630142569542
fps: 5.249060769055563
TIMESTEP 326379 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.396614 / Loss  0.12735949456691742
fps: 5.293993507313108
TIMESTEP 326380 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.129924 / Loss  0.03400934860110283
fps: 6.074959626317123
TIMESTEP 326381 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.509067 / Loss  0.01319310162216425
fps: 6.1016578314935614
TIMESTEP 326382 / STATE

fps: 5.776482578157278
TIMESTEP 326436 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.713698 / Loss  0.04246836155653
fps: 6.018455834135688
TIMESTEP 326437 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.495625 / Loss  0.03376082330942154
fps: 5.82701655869731
TIMESTEP 326438 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.539471 / Loss  0.06392174959182739
fps: 5.8440664174456565
TIMESTEP 326439 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.832264 / Loss  0.017624512314796448
fps: 6.130876093362645
TIMESTEP 326440 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.392469 / Loss  0.03087669610977173
fps: 6.199446908704463
TIMESTEP 326441 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.097241 / Loss  0.031885527074337006
fps: 5.0471393193474094
TIMESTEP 326442 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.783059 / Loss  0.0384625643491745
fps: 4.573542111920443
TIMESTEP 326443 / STATE tr

fps: 4.816073542565883
TIMESTEP 326497 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.481295 / Loss  0.04484076052904129
fps: 2.9920118929916097
TIMESTEP 326498 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.037143 / Loss  0.08349389582872391
fps: 5.759879592360816
TIMESTEP 326499 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.858571 / Loss  0.025272725149989128
fps: 5.330371383292517
TIMESTEP 326500 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.830586 / Loss  0.18050862848758698
fps: 5.360201408324707
TIMESTEP 326501 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.604782 / Loss  0.024200526997447014
fps: 5.440202961673604
TIMESTEP 326502 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.822403 / Loss  0.02032753825187683
fps: 5.313395398684795
TIMESTEP 326503 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.433733 / Loss  0.10645139217376709
fps: 5.2024569098107705
TIMESTEP 326504 / STA

fps: 5.460650751989335
TIMESTEP 326558 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.484295 / Loss  0.034953974187374115
fps: 5.460842710746039
TIMESTEP 326559 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.420521 / Loss  1.9787851572036743
fps: 5.966157242509417
TIMESTEP 326560 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.782 / Loss  0.04646630212664604
fps: 6.017238411128072
TIMESTEP 326561 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.102423 / Loss  0.022942498326301575
fps: 5.009655488722523
TIMESTEP 326562 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.050098 / Loss  0.019041748717427254
fps: 4.975455486898561
TIMESTEP 326563 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.169757 / Loss  0.05378212034702301
fps: 4.8747000022082165
TIMESTEP 326564 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.138956 / Loss  0.017143379896879196
fps: 5.582892085220118
TIMESTEP 326565 / STATE train /

TIMESTEP 326619 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.882582 / Loss  0.009015597403049469
fps: 5.111688514280979
TIMESTEP 326620 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.350212 / Loss  0.022965164855122566
fps: 5.490825069546719
TIMESTEP 326621 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.2032 / Loss  0.009536891244351864
fps: 5.98148071561504
TIMESTEP 326622 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.531573 / Loss  0.012326082214713097
fps: 6.001800123347085
TIMESTEP 326623 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.175504 / Loss  0.018160900101065636
fps: 5.948802028743367
TIMESTEP 326624 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.760272 / Loss  0.023044228553771973
fps: 5.079467191127441
TIMESTEP 326625 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.337776 / Loss  0.02230987325310707
fps: 4.995121951219512
TIMESTEP 326626 / STATE train / EPSILON 0 / ACTI

fps: 4.747426682814746
TIMESTEP 326681 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.1153145 / Loss  0.31865936517715454
fps: 4.98257176628514
TIMESTEP 326682 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.636165 / Loss  1.8237515687942505
fps: 6.2335000334391
TIMESTEP 326683 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.945318 / Loss  0.03566259518265724
fps: 5.475799997650042
TIMESTEP 326684 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.242648 / Loss  0.02339145727455616
fps: 5.205394651763984
TIMESTEP 326685 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.878589 / Loss  0.02288663573563099
fps: 6.098135943390603
TIMESTEP 326686 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.238033 / Loss  0.0296560600399971
fps: 5.3155973720496394
TIMESTEP 326687 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.8950205 / Loss  0.02907368168234825
fps: 5.35818136525355
TIMESTEP 326688 / STATE train / 

fps: 6.058830242263417
TIMESTEP 326742 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.320844 / Loss  0.047943420708179474
fps: 5.445811352797819
TIMESTEP 326743 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.603268 / Loss  0.03607653081417084
fps: 5.34401549320898
TIMESTEP 326744 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.378333 / Loss  0.05863822251558304
fps: 5.352847562103974
TIMESTEP 326745 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.701846 / Loss  0.024233195930719376
fps: 5.191349368332588
TIMESTEP 326746 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.95918 / Loss  0.031888723373413086
fps: 5.223435760381978
TIMESTEP 326747 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.963862 / Loss  0.03295397385954857
fps: 5.245679881636824
TIMESTEP 326748 / STATE train / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  10.085486 / Loss  0.22088336944580078
fps: 5.55183988987134
TIMESTEP 326749 / STATE train /

TIMESTEP 326803 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.5237032 / Loss  0.8146911859512329
fps: 6.093484773203395
TIMESTEP 326804 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.07755 / Loss  0.02953530102968216
fps: 6.285117459042556
TIMESTEP 326805 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.751966 / Loss  0.02102028578519821
fps: 2.8757812864246084
TIMESTEP 326806 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.177665 / Loss  0.02165624313056469
fps: 4.7814083498724935
TIMESTEP 326807 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.645221 / Loss  0.2765713632106781
fps: 4.5321337844248095
TIMESTEP 326808 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.546969 / Loss  0.028465058654546738
fps: 5.171207070009666
TIMESTEP 326809 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.988886 / Loss  0.04148260876536369
fps: 5.431038322689581
TIMESTEP 326810 / STATE train / EPSILON 0 / AC

fps: 5.103403969513204
TIMESTEP 326864 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.72126 / Loss  0.08471592515707016
fps: 6.004549612681652
TIMESTEP 326865 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.734053 / Loss  0.04787001013755798
fps: 6.014943052914469
TIMESTEP 326866 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.420762 / Loss  0.03794579580426216
fps: 6.149726332087544
TIMESTEP 326867 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.747397 / Loss  0.04207267984747887
fps: 5.371122257807988
TIMESTEP 326868 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.445972 / Loss  0.0711631178855896
fps: 6.304891724264407
TIMESTEP 326869 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.909633 / Loss  2.369718551635742
fps: 5.137440456432298
TIMESTEP 326870 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.907071 / Loss  0.026342645287513733
fps: 6.38553083299713
TIMESTEP 326871 / STATE train

TIMESTEP 326925 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.170833 / Loss  0.009696708992123604
fps: 6.285108040878712
TIMESTEP 326926 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.813601 / Loss  0.01791933737695217
fps: 6.324609603541766
TIMESTEP 326927 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.071443 / Loss  0.006783924996852875
fps: 6.14974436570879
TIMESTEP 326928 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.014599 / Loss  0.03462924808263779
fps: 5.124679272577989
TIMESTEP 326929 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.120345 / Loss  0.049727410078048706
fps: 6.301102988675662
TIMESTEP 326930 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.0040865 / Loss  0.06504174321889877
fps: 5.273512515826306
TIMESTEP 326931 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.793702 / Loss  0.03487158939242363
fps: 6.285117459042556
TIMESTEP 326932 / STATE train / EPSILON 0 / A

fps: 6.409614029832956
TIMESTEP 326986 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.943198 / Loss  0.012587470933794975
fps: 6.022629892134677
TIMESTEP 326987 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.032705 / Loss  0.02105640433728695
fps: 6.093467068025709
TIMESTEP 326988 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.909085 / Loss  2.1660802364349365
fps: 5.408676208320814
TIMESTEP 326989 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.517621 / Loss  0.12838727235794067
fps: 5.432515536075558
TIMESTEP 326990 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.895932 / Loss  0.01463925838470459
fps: 6.3837522906177515
TIMESTEP 326991 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.596433 / Loss  0.07060985267162323
fps: 6.187110383384225
TIMESTEP 326992 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.990175 / Loss  0.0561382994055748
fps: 6.149843552516209
TIMESTEP 326993 / STATE train /

TIMESTEP 327047 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.0241165 / Loss  0.016336694359779358
fps: 6.16557348613361
TIMESTEP 327048 / STATE train / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  9.246972 / Loss  0.017852166667580605
fps: 5.358366187250721
TIMESTEP 327049 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.225797 / Loss  0.010723844170570374
fps: 5.21844463300595
TIMESTEP 327050 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.710436 / Loss  0.26344823837280273
fps: 5.460835600921791
TIMESTEP 327051 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.952426 / Loss  0.03423625975847244
fps: 6.48917466280088
TIMESTEP 327052 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.323564 / Loss  0.017298337072134018
fps: 5.344022302095026
TIMESTEP 327053 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.164576 / Loss  0.010576285421848297
fps: 4.61680642299386
TIMESTEP 327054 / STATE train / EPSILON 0 / ACTION 1 /

TIMESTEP 327108 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.827682 / Loss  0.007684915792196989
fps: 4.746019245218116
TIMESTEP 327109 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.152879 / Loss  0.012179836630821228
fps: 6.090476370257861
TIMESTEP 327110 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.874774 / Loss  0.014826909638941288
fps: 5.790574224838576
TIMESTEP 327111 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.956746 / Loss  0.014145168475806713
fps: 6.130867131783625
TIMESTEP 327112 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.470524 / Loss  0.012494385242462158
fps: 2.5014590787019837
TIMESTEP 327113 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.317293 / Loss  0.010387958958745003
fps: 5.948413942162462
TIMESTEP 327114 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.206894 / Loss  0.024652473628520966
fps: 6.0565817593715705
TIMESTEP 327115 / STATE train / EPSILON 0 / AC

fps: 5.984032323416786
TIMESTEP 327169 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.216956 / Loss  1.0304328203201294
fps: 6.1308581702308045
TIMESTEP 327170 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.953431 / Loss  0.026125315576791763
fps: 5.372759751031498
TIMESTEP 327171 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.3192215 / Loss  0.015728900209069252
fps: 5.259651714404307
TIMESTEP 327172 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.589072 / Loss  0.13808181881904602
fps: 5.287519886642984
TIMESTEP 327173 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.658408 / Loss  0.02309185266494751
fps: 6.265410687770936
TIMESTEP 327174 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.3248 / Loss  0.01638367772102356
fps: 6.187804093627643
TIMESTEP 327175 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.102124 / Loss  0.023451238870620728
fps: 5.345452552670047
TIMESTEP 327176 / STATE train 

fps: 5.793293581567777
TIMESTEP 327231 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.327386 / Loss  0.016377020627260208
fps: 6.28513629545492
TIMESTEP 327232 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.329896 / Loss  0.02117869257926941
fps: 5.999756822905566
TIMESTEP 327233 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.59607 / Loss  0.015826234593987465
fps: 5.82433824815937
TIMESTEP 327234 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.935608 / Loss  0.05853812396526337
fps: 6.190014049752653
TIMESTEP 327235 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.459784 / Loss  0.08581098914146423
fps: 5.984032323416786
TIMESTEP 327236 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.462895 / Loss  0.03978477045893669
fps: 5.809856647952915
TIMESTEP 327237 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.494128 / Loss  0.048725470900535583
fps: 6.226319360015201
TIMESTEP 327238 / STATE train / 

TIMESTEP 327292 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.831953 / Loss  0.025955509394407272
fps: 5.218438140360089
TIMESTEP 327293 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.001095 / Loss  0.01799062266945839
fps: 5.401800984978054
TIMESTEP 327294 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.8261175 / Loss  0.016514819115400314
fps: 5.551722312229317
TIMESTEP 327295 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.840728 / Loss  2.8422913551330566
fps: 5.421932544878946
TIMESTEP 327296 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.616909 / Loss  0.058528803288936615
fps: 5.3583524962951605
TIMESTEP 327297 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.684002 / Loss  0.01911398023366928
fps: 5.098633290908583
TIMESTEP 327298 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.7739 / Loss  0.0038060746155679226
fps: 5.059919631041093
TIMESTEP 327299 / STATE train / EPSILON 0 / ACTION 

fps: 5.567330078659053
TIMESTEP 327353 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.496298 / Loss  1.9440102577209473
fps: 5.551817843684106
TIMESTEP 327354 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.244678 / Loss  0.021728483960032463
fps: 4.91074196705803
TIMESTEP 327355 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.295608 / Loss  0.026354748755693436
fps: 5.431157876744701
TIMESTEP 327356 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.127833 / Loss  0.397085964679718
fps: 6.2294264857679655
TIMESTEP 327357 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.103538 / Loss  0.01649327762424946
fps: 6.056599250849797
TIMESTEP 327358 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.23473 / Loss  0.03957168385386467
fps: 6.285117459042556
TIMESTEP 327359 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.262216 / Loss  0.020241878926753998
fps: 6.14974436570879
TIMESTEP 327360 / STATE train / EP

fps: 5.301501351190792
TIMESTEP 327415 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  11.144623 / Loss  0.12781861424446106
fps: 4.129483497539129
TIMESTEP 327416 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.832276 / Loss  0.07256101071834564
fps: 6.01258911074891
TIMESTEP 327417 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.052602 / Loss  0.03488391637802124
fps: 6.020097112600956
TIMESTEP 327418 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.71659 / Loss  0.04702097177505493
fps: 6.261557065014555
TIMESTEP 327419 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.851219 / Loss  0.031645435839891434
fps: 2.9977107840439574
TIMESTEP 327420 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.352199 / Loss  0.09078458696603775
fps: 5.009188818638469
TIMESTEP 327421 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.594039 / Loss  0.030895959585905075
fps: 4.825998209654519
TIMESTEP 327422 / STATE 

fps: 6.035638480810104
TIMESTEP 327477 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.862286 / Loss  0.07184098660945892
fps: 5.4016201110123765
TIMESTEP 327478 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.012036 / Loss  2.346013069152832
fps: 4.996603651759346
TIMESTEP 327479 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.92006 / Loss  0.042481929063797
fps: 5.052057474130257
TIMESTEP 327480 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.499945 / Loss  0.170575812458992
fps: 5.475771402460916
TIMESTEP 327481 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.088115 / Loss  0.013225343078374863
fps: 5.9661827021690925
TIMESTEP 327482 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.634809 / Loss  0.39787936210632324
fps: 5.885073986041852
TIMESTEP 327483 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.642781 / Loss  0.011862566694617271
fps: 5.460828491116056
TIMESTEP 327484 / STATE train / E

fps: 6.207006572083518
TIMESTEP 327538 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.5471344 / Loss  0.08794581145048141
fps: 6.48917466280088
TIMESTEP 327539 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.286808 / Loss  0.02475520223379135
fps: 6.486284528378918
TIMESTEP 327540 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.491883 / Loss  0.015438736416399479
fps: 5.984049398357002
TIMESTEP 327541 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.44846 / Loss  0.01436439249664545
fps: 6.4473397812305935
TIMESTEP 327542 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.279373 / Loss  0.29969102144241333
fps: 6.111936209650156
TIMESTEP 327543 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.391608 / Loss  0.020956875756382942
fps: 6.14974436570879
TIMESTEP 327544 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.616806 / Loss  0.20323117077350616
fps: 6.293057583154913
TIMESTEP 327545 / STATE train /

TIMESTEP 327599 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.055864 / Loss  0.016795851290225983
fps: 5.9879279330725526
TIMESTEP 327600 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.507806 / Loss  0.013213971629738808
fps: 5.85754107243608
TIMESTEP 327601 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.900009 / Loss  0.028041670098900795
fps: 5.151157206983156
TIMESTEP 327602 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.012525 / Loss  0.05984785407781601
fps: 6.035438724735447
TIMESTEP 327603 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.7982025 / Loss  0.013251959346234798
fps: 5.983981099180793
TIMESTEP 327604 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.501394 / Loss  0.022328633815050125
fps: 5.984168925666999
TIMESTEP 327605 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.170707 / Loss  0.022533435374498367
fps: 6.020261289683622
TIMESTEP 327606 / STATE train / EPSILON 0 / A

fps: 6.365200024888344
TIMESTEP 327661 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  11.157446 / Loss  3.9188623428344727
fps: 5.164527659742137
TIMESTEP 327662 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.631569 / Loss  0.05452796816825867
fps: 6.002040603137034
TIMESTEP 327663 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.879679 / Loss  0.012956235557794571
fps: 6.5094880474037655
TIMESTEP 327664 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.923147 / Loss  0.021912069991230965
fps: 5.878269497860628
TIMESTEP 327665 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.678177 / Loss  0.03178504854440689
fps: 5.204864967493668
TIMESTEP 327666 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.694343 / Loss  0.03209047019481659
fps: 5.34403591991917
TIMESTEP 327667 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.405954 / Loss  0.015383065678179264
fps: 5.359112450153262
TIMESTEP 327668 / STATE

fps: 5.984032323416786
TIMESTEP 327722 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.025103 / Loss  0.02448154054582119
fps: 6.662214922661008
TIMESTEP 327723 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.885415 / Loss  0.03664704039692879
fps: 5.4164501448930995
TIMESTEP 327724 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.04932 / Loss  0.020593520253896713
fps: 5.50596896192309
TIMESTEP 327725 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.087954 / Loss  0.21915730834007263
fps: 5.984032323416786
TIMESTEP 327726 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.072494 / Loss  2.0740413665771484
fps: 3.1574532024673627
TIMESTEP 327727 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.128792 / Loss  0.15530383586883545
fps: 5.445959842527864
TIMESTEP 327728 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.751192 / Loss  0.019859880208969116
fps: 6.038271285019147
TIMESTEP 327729 / STATE tr

fps: 6.673790801210552
TIMESTEP 327784 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.508667 / Loss  0.11044050753116608
fps: 6.660670274665244
TIMESTEP 327785 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.062353 / Loss  0.01962697133421898
fps: 5.329138772455661
TIMESTEP 327786 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.308143 / Loss  0.045695580542087555
fps: 5.539440892480034
TIMESTEP 327787 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.824946 / Loss  0.016877584159374237
fps: 6.730658382063149
TIMESTEP 327788 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.353262 / Loss  0.01804034411907196
fps: 6.6099081550962255
TIMESTEP 327789 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.043102 / Loss  0.016998931765556335
fps: 6.706379712832976
TIMESTEP 327790 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.184797 / Loss  0.03458713740110397
fps: 6.757290062315936
TIMESTEP 327791 / STATE trai

TIMESTEP 327845 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.394354 / Loss  0.013051377609372139
fps: 4.913330631236397
TIMESTEP 327846 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.179746 / Loss  0.012815993279218674
fps: 6.920265967100596
TIMESTEP 327847 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.463582 / Loss  0.016202034428715706
fps: 6.670097420255653
TIMESTEP 327848 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.164417 / Loss  0.009704557247459888
fps: 5.965783859130089
TIMESTEP 327849 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.284319 / Loss  0.022024940699338913
fps: 5.467085162514729
TIMESTEP 327850 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.113659 / Loss  0.02179357223212719
fps: 6.6989568975568305
TIMESTEP 327851 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.650162 / Loss  0.023222964257001877
fps: 6.652672851480406
TIMESTEP 327852 / STATE train / EPSILON 0 / ACTI

TIMESTEP 327906 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.419614 / Loss  0.011603547260165215
fps: 7.053067374276499
TIMESTEP 327907 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.365374 / Loss  0.01745704747736454
fps: 6.818742542032859
TIMESTEP 327908 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.313062 / Loss  0.018984399735927582
fps: 6.03828867089537
TIMESTEP 327909 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.123429 / Loss  0.031033754348754883
fps: 7.065661755539758
TIMESTEP 327910 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.394432 / Loss  0.018235957249999046
fps: 7.172533615608108
TIMESTEP 327911 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.972781 / Loss  0.037300340831279755
fps: 7.115804117502354
TIMESTEP 327912 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.929319 / Loss  0.23186545073986053
fps: 7.0128894722322075
TIMESTEP 327913 / STATE train / EPSILON 0 / ACTION

fps: 5.8270003681552645
TIMESTEP 327967 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.283761 / Loss  0.019335560500621796
fps: 6.120221385953111
TIMESTEP 327968 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.063594 / Loss  0.4980577528476715
fps: 6.000967181638429
TIMESTEP 327969 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.369786 / Loss  0.018022820353507996
fps: 7.073824529921593
TIMESTEP 327970 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.161727 / Loss  0.03381047397851944
fps: 6.952580340907019
TIMESTEP 327971 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.4671955 / Loss  3.9541687965393066
fps: 7.043934912981633
TIMESTEP 327972 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.465677 / Loss  0.04114899784326553
fps: 5.885107015874887
TIMESTEP 327973 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  10.301219 / Loss  0.010343346744775772
fps: 5.9860051235576615
TIMESTEP 327974 / STAT

TIMESTEP 328028 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.417657 / Loss  0.08970992267131805
fps: 6.955889523139841
TIMESTEP 328029 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.726154 / Loss  0.02046358585357666
fps: 6.821270934774901
TIMESTEP 328030 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.222537 / Loss  0.014648137614130974
fps: 6.92264170250973
TIMESTEP 328031 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.7569275 / Loss  0.03330289572477341
fps: 6.915667616938695
TIMESTEP 328032 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.292272 / Loss  0.01869165524840355
fps: 6.526911052773269
TIMESTEP 328033 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.5974 / Loss  0.015929123386740685
fps: 5.930717163190365
TIMESTEP 328034 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.119756 / Loss  0.010006366297602654
fps: 3.237162463059738
TIMESTEP 328035 / STATE train / EPSILON 0 / ACTION 1 /

fps: 5.551854587426768
TIMESTEP 328090 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.724779 / Loss  0.033263757824897766
fps: 5.475778551230198
TIMESTEP 328091 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.492596 / Loss  0.036240071058273315
fps: 5.878426030261749
TIMESTEP 328092 / STATE train / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  8.346353 / Loss  0.02584092691540718
fps: 5.09852792803744
TIMESTEP 328093 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.4457655 / Loss  0.024449899792671204
fps: 5.445952771404718
TIMESTEP 328094 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.47967 / Loss  3.175877094268799
fps: 5.475614134259011
TIMESTEP 328095 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.604108 / Loss  0.030637620016932487
fps: 5.460714736742061
TIMESTEP 328096 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.604136 / Loss  0.020100850611925125
fps: 5.476872532876482
TIMESTEP 328097 / STATE train /

fps: 4.990147730503963
TIMESTEP 328151 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.055839 / Loss  0.05095546320080757
fps: 4.52187300214865
TIMESTEP 328152 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.815111 / Loss  0.029773470014333725
fps: 6.130902978256898
TIMESTEP 328153 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.678056 / Loss  0.2532873749732971
fps: 6.197898718839124
TIMESTEP 328154 / STATE train / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  8.488436 / Loss  0.019974172115325928
fps: 5.4310172254615185
TIMESTEP 328155 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.753227 / Loss  0.018282415345311165
fps: 5.475799997650042
TIMESTEP 328156 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.416949 / Loss  0.040251225233078
fps: 6.149753382559074
TIMESTEP 328157 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.70991 / Loss  0.02991531416773796
fps: 6.4059723649826115
TIMESTEP 328158 / STATE train / E

fps: 4.827714644502686
TIMESTEP 328212 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.50679 / Loss  0.05503857880830765
fps: 5.913200397004421
TIMESTEP 328213 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.41384 / Loss  0.02455628290772438
fps: 5.301508052171957
TIMESTEP 328214 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.16552 / Loss  0.048033151775598526
fps: 4.816068012557154
TIMESTEP 328215 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.444092 / Loss  0.038490649312734604
fps: 5.965792344596968
TIMESTEP 328216 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.65612 / Loss  0.09934712201356888
fps: 4.81604589264923
TIMESTEP 328217 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.695792 / Loss  0.021446391940116882
fps: 6.1687472791236715
TIMESTEP 328218 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.699825 / Loss  0.1501246690750122
fps: 4.68071971172249
TIMESTEP 328219 / STATE train / EPS

fps: 5.4608213813288335
TIMESTEP 328273 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.749589 / Loss  0.021048303693532944
fps: 5.519031647260224
TIMESTEP 328274 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.791952 / Loss  0.05514974147081375
fps: 6.6549528522672645
TIMESTEP 328275 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.910603 / Loss  2.4827370643615723
fps: 6.614901532798481
TIMESTEP 328276 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.168715 / Loss  0.017405610531568527
fps: 6.684522642697202
TIMESTEP 328277 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.056786 / Loss  0.03846199810504913
fps: 5.984032323416786
TIMESTEP 328278 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.067255 / Loss  0.047774165868759155
fps: 6.075294001770022
TIMESTEP 328279 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.873272 / Loss  0.10851681977510452
fps: 5.981864597743771
TIMESTEP 328280 / STATE trai

fps: 5.9307423212775765
TIMESTEP 328334 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.823469 / Loss  0.024491261690855026
fps: 6.818487588943601
TIMESTEP 328335 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.198823 / Loss  0.015665659680962563
fps: 6.6750653294162205
TIMESTEP 328336 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.284546 / Loss  0.018314940854907036
fps: 6.6259889290498695
TIMESTEP 328337 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.030096 / Loss  0.028350599110126495
fps: 6.002452899043312
TIMESTEP 328338 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.350317 / Loss  0.03652140498161316
fps: 5.487843619568489
TIMESTEP 328339 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.213692 / Loss  0.03748131915926933
fps: 6.676382853413559
TIMESTEP 328340 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.323897 / Loss  0.030664904043078423
fps: 2.705501626473034
TIMESTEP 328341 / STATE 

fps: 6.1238135423708275
TIMESTEP 328396 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.328843 / Loss  0.027854038402438164
fps: 6.119239191805424
TIMESTEP 328397 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.415979 / Loss  0.02318379282951355
fps: 6.012511539630387
TIMESTEP 328398 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.4935055 / Loss  0.006001685280352831
fps: 6.076851972588053
TIMESTEP 328399 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.168416 / Loss  0.014726562425494194
fps: 7.143447885903869
TIMESTEP 328400 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.513528 / Loss  0.015584979206323624
fps: 5.976656535300899
TIMESTEP 328401 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.523064 / Loss  0.024402178823947906
fps: 5.8954470319686045
TIMESTEP 328402 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.686146 / Loss  0.011677365750074387
fps: 7.016620188133504
TIMESTEP 328403 / STATE

TIMESTEP 328458 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.877086 / Loss  0.021054618060588837
fps: 7.047094838217554
TIMESTEP 328459 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.849178 / Loss  0.030609849840402603
fps: 6.0750916122304135
TIMESTEP 328460 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.176626 / Loss  3.0915255546569824
fps: 6.926780288016911
TIMESTEP 328461 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.205678 / Loss  0.018203141167759895
fps: 7.109447727141748
TIMESTEP 328462 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.294186 / Loss  0.03190712630748749
fps: 7.127933219131522
TIMESTEP 328463 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.275695 / Loss  0.03969259187579155
fps: 6.132606168714863
TIMESTEP 328464 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.2219925 / Loss  0.011590568348765373
fps: 7.121930015214084
TIMESTEP 328465 / STATE train / EPSILON 0 / ACTION

TIMESTEP 328519 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.504701 / Loss  2.709818124771118
fps: 5.774470539806402
TIMESTEP 328520 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  3.649351 / Loss  0.07591398060321808
fps: 5.961230647657679
TIMESTEP 328521 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.950388 / Loss  0.16101637482643127
fps: 7.118219386153233
TIMESTEP 328522 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.028447 / Loss  0.058529507368803024
fps: 7.022517642922321
TIMESTEP 328523 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.897658 / Loss  0.05471431463956833
fps: 7.085487628324355
TIMESTEP 328524 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.062754 / Loss  0.024907507002353668
fps: 6.898741901473895
TIMESTEP 328525 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.55923 / Loss  0.018837185576558113
fps: 7.241559464018412
TIMESTEP 328526 / STATE train / EPSILON 0 / ACTION 0 /

TIMESTEP 328580 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.198172 / Loss  0.06400157511234283
fps: 6.828745077033537
TIMESTEP 328581 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.628359 / Loss  0.018880799412727356
fps: 3.1475205129321147
TIMESTEP 328582 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.627112 / Loss  2.5370898246765137
fps: 6.468159653576044
TIMESTEP 328583 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.757796 / Loss  0.015613701194524765
fps: 5.881500697623872
TIMESTEP 328584 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.578674 / Loss  0.752224862575531
fps: 5.414038045253121
TIMESTEP 328585 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.957184 / Loss  1.9673820734024048
fps: 6.945373315736572
TIMESTEP 328586 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.625408 / Loss  0.03356265649199486
fps: 6.834809701662788
TIMESTEP 328587 / STATE train / EPSILON 0 / ACTION 0 / R

fps: 6.047665530949144
TIMESTEP 328642 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.44942 / Loss  0.03147313743829727
fps: 7.128308755423597
TIMESTEP 328643 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.764428 / Loss  0.026075754314661026
fps: 5.978062073840961
TIMESTEP 328644 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.150616 / Loss  0.027944501489400864
fps: 7.039678823247047
TIMESTEP 328645 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.4109 / Loss  0.028557773679494858
fps: 6.826777779586256
TIMESTEP 328646 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.339264 / Loss  0.011121105402708054
fps: 7.001019864830353
TIMESTEP 328647 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.218414 / Loss  0.027347542345523834
fps: 6.8838302439857015
TIMESTEP 328648 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.59862 / Loss  0.04820661246776581
fps: 5.9479668617034855
TIMESTEP 328649 / STATE train

fps: 5.645936337730132
TIMESTEP 328703 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.503337 / Loss  0.12859630584716797
fps: 6.7522316649307035
TIMESTEP 328704 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.702633 / Loss  0.011507796123623848
fps: 6.7564627954328715
TIMESTEP 328705 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.479035 / Loss  0.38563311100006104
fps: 5.953632034294069
TIMESTEP 328706 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.324882 / Loss  0.018550444394350052
fps: 5.940149611808059
TIMESTEP 328707 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.7123381 / Loss  0.06273481249809265
fps: 5.782622782213313
TIMESTEP 328708 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.65547 / Loss  0.027453802525997162
fps: 5.657496718916709
TIMESTEP 328709 / STATE train / EPSILON 0 / ACTION 1 / REWARD -1 / Q_MAX  9.373052 / Loss  2.9836947917938232
fps: 6.875998373749164
TIMESTEP 328710 / STATE trai

fps: 6.0327823580514695
TIMESTEP 328764 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.941799 / Loss  0.0282738134264946
fps: 6.035438724735447
TIMESTEP 328765 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.917949 / Loss  0.08726932108402252
fps: 6.868218044386367
TIMESTEP 328766 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.41081634 / Loss  1.2280999422073364
fps: 5.759863772753177
TIMESTEP 328767 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.60251 / Loss  0.8658413887023926
fps: 6.065278723751929
TIMESTEP 328768 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.636921 / Loss  0.03431637957692146
fps: 5.910084178073119
TIMESTEP 328769 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.439293 / Loss  0.017672354355454445
fps: 5.985765926991507
TIMESTEP 328770 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.0683 / Loss  0.014365678653120995
fps: 7.241109394287292
TIMESTEP 328771 / STATE train / 

fps: 7.039690638594983
TIMESTEP 328825 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.076166 / Loss  0.010966833680868149
fps: 6.61348302205123
TIMESTEP 328826 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.223687 / Loss  0.010310770943760872
fps: 5.502617292761578
TIMESTEP 328827 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.970429 / Loss  0.06821697950363159
fps: 5.615496253929491
TIMESTEP 328828 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.825178 / Loss  0.016460807994008064
fps: 5.842666202333275
TIMESTEP 328829 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.838355 / Loss  0.021174374967813492
fps: 6.426358109433222
TIMESTEP 328830 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.274999 / Loss  0.011716336011886597
fps: 5.762206020340734
TIMESTEP 328831 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.83993 / Loss  0.019302979111671448
fps: 5.989783546568834
TIMESTEP 328832 / STATE trai

fps: 5.460131819951078
TIMESTEP 328886 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.488928 / Loss  0.09720619767904282
fps: 6.6803385478882165
TIMESTEP 328887 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.405768 / Loss  0.011440558359026909
fps: 6.703239344555731
TIMESTEP 328888 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.4694605 / Loss  0.02390488050878048
fps: 6.631719584290573
TIMESTEP 328889 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.310016 / Loss  0.028792202472686768
fps: 6.696646650562244
TIMESTEP 328890 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.606129 / Loss  0.02385488525032997
fps: 3.1524742949912814
TIMESTEP 328891 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.324852 / Loss  0.047013312578201294
fps: 6.596279992891506
TIMESTEP 328892 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.658388 / Loss  0.051212333142757416
fps: 6.79468387732224
TIMESTEP 328893 / STATE tr

fps: 6.729492099757409
TIMESTEP 328947 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.548981 / Loss  0.039097703993320465
fps: 6.661971540163504
TIMESTEP 328948 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.772715 / Loss  0.049183666706085205
fps: 6.68316995755204
TIMESTEP 328949 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.013239 / Loss  0.039074692875146866
fps: 6.662236087196853
TIMESTEP 328950 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.158116 / Loss  0.057972799986600876
fps: 5.460828491116056
TIMESTEP 328951 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.582403 / Loss  0.11376983672380447
fps: 5.455657460532597
TIMESTEP 328952 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.72657 / Loss  0.04401141405105591
fps: 6.043691831520886
TIMESTEP 328953 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.261507 / Loss  0.11471543461084366
fps: 5.401828812817144
TIMESTEP 328954 / STATE train 

fps: 6.035951169038995
TIMESTEP 329008 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.163627 / Loss  0.02729455567896366
fps: 5.40181489886176
TIMESTEP 329009 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.827853 / Loss  0.04017305001616478
fps: 5.460835600921791
TIMESTEP 329010 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.840082 / Loss  0.04094606637954712
fps: 5.990485045582373
TIMESTEP 329011 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.310159 / Loss  0.038483016192913055
fps: 5.504227607068103
TIMESTEP 329012 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.128482 / Loss  0.0470690056681633
fps: 6.018861751284333
TIMESTEP 329013 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  0.3048544 / Loss  0.0998321920633316
fps: 5.999267667521054
TIMESTEP 329014 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.823227 / Loss  0.042463477700948715
fps: 6.012839075747106
TIMESTEP 329015 / STATE train / 

fps: 5.8895694494761
TIMESTEP 329069 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.456388 / Loss  0.021821944043040276
fps: 6.915530787050395
TIMESTEP 329070 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.390409 / Loss  0.01987728849053383
fps: 6.915770242908304
TIMESTEP 329071 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.115768 / Loss  0.0076240855269134045
fps: 7.075471284339443
TIMESTEP 329072 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.313248 / Loss  0.03636735677719116
fps: 6.957793441170762
TIMESTEP 329073 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.316647 / Loss  0.03747059777379036
fps: 6.9829766901358035
TIMESTEP 329074 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.960905 / Loss  0.014271561056375504
fps: 7.005147424784466
TIMESTEP 329075 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.115985 / Loss  0.016257312148809433
fps: 7.104702925014736
TIMESTEP 329076 / STATE tr

fps: 6.636924513224625
TIMESTEP 329131 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.022853 / Loss  0.052940238267183304
fps: 5.963120615774822
TIMESTEP 329132 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.659563 / Loss  0.02603911980986595
fps: 6.9392717105372
TIMESTEP 329133 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.056717 / Loss  0.010797331109642982
fps: 7.121530967245709
TIMESTEP 329134 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.05201 / Loss  0.02387315407395363
fps: 6.915781645981833
TIMESTEP 329135 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  0.1486883 / Loss  0.014612014405429363
fps: 5.97968991695477
TIMESTEP 329136 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.343524 / Loss  0.01981690712273121
fps: 7.083202172429866
TIMESTEP 329137 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.765344 / Loss  0.009922279976308346
fps: 6.016340769301387
TIMESTEP 329138 / STATE train /

TIMESTEP 329192 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.227286 / Loss  0.01970185711979866
fps: 5.722254737844501
TIMESTEP 329193 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.659876 / Loss  0.030539637431502342
fps: 7.037623556376608
TIMESTEP 329194 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.127081 / Loss  0.08434056490659714
fps: 7.006118634356892
TIMESTEP 329195 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.916703 / Loss  0.03488372266292572
fps: 7.083931922166844
TIMESTEP 329196 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.907651 / Loss  0.7204012870788574
fps: 7.082006186597506
TIMESTEP 329197 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.126642 / Loss  0.937972366809845
fps: 6.869534169934127
TIMESTEP 329198 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.923807 / Loss  0.082663394510746
fps: 7.0785998416965805
TIMESTEP 329199 / STATE train / EPSILON 0 / ACTION 0 / REW

TIMESTEP 329253 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.742136 / Loss  0.05662670359015465
fps: 6.866261661894683
TIMESTEP 329254 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.485707 / Loss  0.04385097324848175
fps: 6.072584860177068
TIMESTEP 329255 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.651479 / Loss  0.18960672616958618
fps: 6.981791064154914
TIMESTEP 329256 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.6952715 / Loss  0.08254708349704742
fps: 7.041404283965933
TIMESTEP 329257 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.732044 / Loss  0.03880692273378372
fps: 6.068648600940761
TIMESTEP 329258 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.366049 / Loss  0.0225528571754694
fps: 7.015622595558767
TIMESTEP 329259 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.79946 / Loss  0.01875143125653267
fps: 5.878426030261749
TIMESTEP 329260 / STATE train / EPSILON 0 / ACTION 1 / R

TIMESTEP 329314 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.182977 / Loss  1.4491157531738281
fps: 6.7493088655095725
TIMESTEP 329315 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.009626 / Loss  0.01610943302512169
fps: 6.662225504912122
TIMESTEP 329316 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.17455 / Loss  1.2398459911346436
fps: 6.77513637325485
TIMESTEP 329317 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.761357 / Loss  0.1024729385972023
fps: 6.618105850461214
TIMESTEP 329318 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.955759 / Loss  0.04518784582614899
fps: 6.844736593023901
TIMESTEP 329319 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.799084 / Loss  0.1949532926082611
fps: 7.138135199483995
TIMESTEP 329320 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.966479 / Loss  0.02582971192896366
fps: 7.054763807008839
TIMESTEP 329321 / STATE train / EPSILON 0 / ACTION 0 / REWAR

fps: 6.385501668574769
TIMESTEP 329375 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.183727 / Loss  0.03549101948738098
fps: 6.821381872117305
TIMESTEP 329376 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.2746935 / Loss  0.02990095503628254
fps: 6.82139296605001
TIMESTEP 329377 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.163688 / Loss  0.3901086151599884
fps: 6.9769249194900596
TIMESTEP 329378 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.7962303 / Loss  0.029926378279924393
fps: 5.792461507229004
TIMESTEP 329379 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.334028 / Loss  0.2577000558376312
fps: 6.6094081904077555
TIMESTEP 329380 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.941103 / Loss  0.08001825213432312
fps: 6.885186267396393
TIMESTEP 329381 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.3218365 / Loss  0.02354404702782631
fps: 5.68612017531611
TIMESTEP 329382 / STATE train 

fps: 5.432163745719599
TIMESTEP 329437 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.115196 / Loss  0.12221963703632355
fps: 6.669132296517961
TIMESTEP 329438 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.69208 / Loss  0.10917913168668747
fps: 5.478861409483203
TIMESTEP 329439 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.333254 / Loss  0.049995727837085724
fps: 6.667764632568255
TIMESTEP 329440 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.075493 / Loss  0.5964626669883728
fps: 5.982717845578901
TIMESTEP 329441 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.998692 / Loss  0.02054361253976822
fps: 6.733089435723241
TIMESTEP 329442 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.17122 / Loss  0.022535357624292374
fps: 5.453011810102292
TIMESTEP 329443 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.301723 / Loss  0.08594821393489838
fps: 3.154088700623177
TIMESTEP 329444 / STATE train / E

fps: 5.9821802710755305
TIMESTEP 329498 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.5320215 / Loss  0.03457307815551758
fps: 6.031350973588509
TIMESTEP 329499 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.1568165 / Loss  0.9159005880355835
fps: 4.594211756561667
TIMESTEP 329500 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  7.9768443 / Loss  1.3467546701431274
fps: 5.82701655869731
TIMESTEP 329501 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.887192 / Loss  0.029954319819808006
fps: 5.045724617718425
TIMESTEP 329502 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.811548 / Loss  0.02767830528318882
fps: 5.617857975199638
TIMESTEP 329503 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.686335 / Loss  0.011937260627746582
fps: 5.416044482207376
TIMESTEP 329504 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.142316 / Loss  0.09195385128259659
fps: 5.4172266688020585
TIMESTEP 329505 / STATE tra

fps: 6.650109717271908
TIMESTEP 329560 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.506153 / Loss  0.09608632326126099
fps: 6.708975683725275
TIMESTEP 329561 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.19359 / Loss  0.02761724963784218
fps: 6.646147605405268
TIMESTEP 329562 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.126641 / Loss  0.1401347815990448
fps: 6.673673993810513
TIMESTEP 329563 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.837317 / Loss  0.06985794007778168
fps: 6.674098767593027
TIMESTEP 329564 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.753487 / Loss  0.09353701770305634
fps: 6.682445910206162
TIMESTEP 329565 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.774724 / Loss  0.03012072667479515
fps: 5.4484500192253735
TIMESTEP 329566 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  2.1913137 / Loss  0.045712538063526154
fps: 5.971993182682191
TIMESTEP 329567 / STATE train /

TIMESTEP 329621 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.570655 / Loss  0.03464168310165405
fps: 5.0092067658801485
TIMESTEP 329622 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.971431 / Loss  0.11100155115127563
fps: 6.187804093627643
TIMESTEP 329623 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.867351 / Loss  0.05824463441967964
fps: 6.489204781921897
TIMESTEP 329624 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.777553 / Loss  0.08987722545862198
fps: 6.187594138586752
TIMESTEP 329625 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.822166 / Loss  0.056948527693748474
fps: 6.426564886899737
TIMESTEP 329626 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  10.105602 / Loss  0.026395544409751892
fps: 5.128263314911723
TIMESTEP 329627 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.508002 / Loss  0.009788140654563904
fps: 6.207070871501931
TIMESTEP 329628 / STATE train / EPSILON 0 / ACTION

fps: 5.759871682546135
TIMESTEP 329682 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.755469 / Loss  0.2313215136528015
fps: 6.76651254634096
TIMESTEP 329683 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.830017 / Loss  0.03167812526226044
fps: 6.758237313916223
TIMESTEP 329684 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.598699 / Loss  0.02025444805622101
fps: 6.490459993872113
TIMESTEP 329685 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.690462 / Loss  0.02515817992389202
fps: 6.6622678342527575
TIMESTEP 329686 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.210552 / Loss  0.022273756563663483
fps: 6.056564267994374
TIMESTEP 329687 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.725178 / Loss  0.03735000267624855
fps: 6.029937606026626
TIMESTEP 329688 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.44952 / Loss  0.03188953548669815
fps: 5.940890020764696
TIMESTEP 329689 / STATE train / E

TIMESTEP 329744 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.253421 / Loss  0.27666354179382324
fps: 4.552775917409035
TIMESTEP 329745 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.47195 / Loss  0.03338524326682091
fps: 5.034441534373117
TIMESTEP 329746 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.403831 / Loss  0.11454010009765625
fps: 4.984188173333555
TIMESTEP 329747 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.096753 / Loss  0.11116485297679901
fps: 6.056686709756422
TIMESTEP 329748 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.418688 / Loss  0.281655877828598
fps: 6.020079831323676
TIMESTEP 329749 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.077661 / Loss  0.2899520695209503
fps: 2.983082864519249
TIMESTEP 329750 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.869724 / Loss  0.025381337851285934
fps: 4.594634504365353
TIMESTEP 329751 / STATE train / EPSILON 0 / ACTION 0 / RE

TIMESTEP 329806 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.0175295 / Loss  0.010509918443858624
fps: 5.796784496591147
TIMESTEP 329807 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.242266 / Loss  0.0173469390720129
fps: 5.825948175876989
TIMESTEP 329808 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.740726 / Loss  0.020515017211437225
fps: 5.972716036021668
TIMESTEP 329809 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.623851 / Loss  0.018679853528738022
fps: 5.93266335212204
TIMESTEP 329810 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.110831 / Loss  0.024295629933476448
fps: 5.9787096958120705
TIMESTEP 329811 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.578623 / Loss  0.12401561439037323
fps: 6.857213857957731
TIMESTEP 329812 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.942357 / Loss  0.07416520267724991
fps: 5.977593487546158
TIMESTEP 329813 / STATE train / EPSILON 0 / ACTION 

fps: 5.952499826149648
TIMESTEP 329868 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.580694 / Loss  0.0160076841711998
fps: 6.727862141756079
TIMESTEP 329869 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.759207 / Loss  0.022044626995921135
fps: 6.699224390181572
TIMESTEP 329870 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.831505 / Loss  1.6220299005508423
fps: 6.653749323806608
TIMESTEP 329871 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.464618 / Loss  0.05617610737681389
fps: 6.679115184888523
TIMESTEP 329872 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.468681 / Loss  0.022602057084441185
fps: 6.687400250320873
TIMESTEP 329873 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.753059 / Loss  0.11386553943157196
fps: 6.66101934477726
TIMESTEP 329874 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.217034 / Loss  0.0786297619342804
fps: 5.967303142352688
TIMESTEP 329875 / STATE train / EP

TIMESTEP 329929 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.850386 / Loss  0.16094323992729187
fps: 6.11400317193768
TIMESTEP 329930 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.609514 / Loss  0.018130136653780937
fps: 5.870831472878416
TIMESTEP 329931 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.710766 / Loss  0.023564569652080536
fps: 5.664396030618474
TIMESTEP 329932 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.594354 / Loss  0.020272523164749146
fps: 5.921105644688826
TIMESTEP 329933 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.4964485 / Loss  0.06499744206666946
fps: 5.990296822402144
TIMESTEP 329934 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.180013 / Loss  0.031621064990758896
fps: 6.091210500404454
TIMESTEP 329935 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.420242 / Loss  0.033473748713731766
fps: 5.676685186062406
TIMESTEP 329936 / STATE train / EPSILON 0 / ACTION

TIMESTEP 329990 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.363484 / Loss  0.050767671316862106
fps: 6.729535288190634
TIMESTEP 329991 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.908962 / Loss  3.173123359680176
fps: 7.159079116257476
TIMESTEP 329992 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.079171 / Loss  0.02672068029642105
fps: 5.979374506388087
TIMESTEP 329993 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.938095 / Loss  0.023913543671369553
fps: 7.075387734859194
TIMESTEP 329994 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.156763 / Loss  0.026138639077544212
fps: 6.933203737773965
TIMESTEP 329995 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  9.306889 / Loss  0.04849879443645477
fps: 5.975856099732859
TIMESTEP 329996 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.732051 / Loss  0.3516833782196045
fps: 6.91582725865199
TIMESTEP 329997 / STATE train / EPSILON 0 / ACTION 0 / R

TIMESTEP 330052 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.819834 / Loss  0.021684784442186356
fps: 6.723300301677017
TIMESTEP 330053 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  6.826252 / Loss  0.05277600884437561
fps: 5.967922960467154
TIMESTEP 330054 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.351079 / Loss  0.04468538612127304
fps: 6.040227824221085
TIMESTEP 330055 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.771182 / Loss  0.03191152215003967
fps: 6.02754592904279
TIMESTEP 330056 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.177843 / Loss  0.01789683662354946
fps: 5.969723710354172
TIMESTEP 330057 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.101183 / Loss  0.19233955442905426
fps: 3.1476386166403008
TIMESTEP 330058 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.220584 / Loss  0.019538328051567078
fps: 4.947204159407794
TIMESTEP 330059 / STATE train / EPSILON 0 / ACTION 1 

fps: 6.672898921022403
TIMESTEP 330114 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.913259 / Loss  4.658478736877441
fps: 5.4164501448930995
TIMESTEP 330115 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.360769 / Loss  0.04071692377328873
fps: 5.50596896192309
TIMESTEP 330116 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.563083 / Loss  0.015464022755622864
fps: 6.6916679297058845
TIMESTEP 330117 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.696931 / Loss  1.268826961517334
fps: 6.686472750692671
TIMESTEP 330118 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.52865 / Loss  0.20418670773506165
fps: 6.654298247701546
TIMESTEP 330119 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8499675 / Loss  0.04401964321732521
fps: 6.682467203477053
TIMESTEP 330120 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.427872 / Loss  0.6630389094352722
fps: 5.485245575765185
TIMESTEP 330121 / STATE train / EPS

TIMESTEP 330175 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.8447356 / Loss  0.037387602031230927
fps: 6.826922231789276
TIMESTEP 330176 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  7.943412 / Loss  0.011704158037900925
fps: 7.159824585062418
TIMESTEP 330177 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.21617 / Loss  0.40238305926322937
fps: 7.049368896956252
TIMESTEP 330178 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.067004 / Loss  0.5738698840141296
fps: 7.191891618469853
TIMESTEP 330179 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.006933 / Loss  0.030372820794582367
fps: 6.003423735994458
TIMESTEP 330180 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.067943 / Loss  0.10649637132883072
fps: 5.973473093547721
TIMESTEP 330181 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.3510895 / Loss  0.02828269451856613
fps: 5.970768983291885
TIMESTEP 330182 / STATE train / EPSILON 0 / ACTION 1

fps: 5.989330226075831
TIMESTEP 330237 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.593185 / Loss  0.29272520542144775
fps: 7.109363373329785
TIMESTEP 330238 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.726121 / Loss  0.059877660125494
fps: 6.116695881956617
TIMESTEP 330239 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.005333 / Loss  0.07673390209674835
fps: 6.018049971734146
TIMESTEP 330240 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.744798 / Loss  0.08198775351047516
fps: 6.1436550193714705
TIMESTEP 330241 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  8.7127495 / Loss  0.041050516068935394
fps: 5.996763069250874
TIMESTEP 330242 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.300002 / Loss  0.05103690177202225
fps: 7.1302112728731615
TIMESTEP 330243 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.643298 / Loss  0.020849719643592834
fps: 5.997534811493155
TIMESTEP 330244 / STATE train

fps: 5.983998173828683
TIMESTEP 330299 / STATE train / EPSILON 0 / ACTION 0 / REWARD -1 / Q_MAX  8.602729 / Loss  0.03061351738870144
fps: 7.002001619325059
TIMESTEP 330300 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.4819145 / Loss  0.03552568703889847
fps: 7.195593061576705
TIMESTEP 330301 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.201715 / Loss  0.06982887536287308
fps: 7.241609475219917
TIMESTEP 330302 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.099216 / Loss  0.4117426872253418
fps: 6.007723246513653
TIMESTEP 330303 / STATE train / EPSILON 0 / ACTION 1 / REWARD 0.1 / Q_MAX  9.12439 / Loss  0.0450669601559639
fps: 7.002328932013549
TIMESTEP 330304 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.199802 / Loss  0.10721258819103241
fps: 6.899479533356582
TIMESTEP 330305 / STATE train / EPSILON 0 / ACTION 0 / REWARD 0.1 / Q_MAX  8.702607 / Loss  0.013387154787778854
fps: 7.032773636220351
TIMESTEP 330306 / STATE train / EP