In [1]:
import numpy as np
from PIL import Image
import cv2 #opencv
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

#keras imports
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

Using TensorFlow backend.


In [2]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "./chromedriver"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [3]:
'''
* Game class: Selenium interfacing between the python and browser
* __init__():  Launch the broswer window using the attributes in chrome_options
* get_crashed() : return true if the agent as crashed on an obstacles. Gets javascript variable from game decribing the state
* get_playing(): true if game in progress, false is crashed or paused
* restart() : sends a signal to browser-javascript to restart the game
* press_up(): sends a single to press up get to the browser
* get_score(): gets current game score from javascript variables.
* pause(): pause the game
* resume(): resume a paused game if not crashed
* end(): close the browser and end the game
'''
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [4]:
class DinoAgent:
    def __init__(self,game): #takes game as input for taking actions
        self._game = game; 
        self.jump(); #to start the game, we need to jump once
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

In [5]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV, implemented using python coroutine 
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the Experience tuple

In [6]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)#processing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [7]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

In [8]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [9]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [10]:
'''
Call only once to init file structure
'''
#init_cache()

'\nCall only once to init file structure\n'

In [11]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2))) # 
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    print("We finish building the model")
    return model

In [12]:
''' 
main training module
Parameters:
* model => Keras Model to be trained
* game_state => Game State module with access to game environment and dino
* observe => flag to indicate wherther the model is to be trained(weight updates), else just play
'''
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,
                     #1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    # We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       # We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


In [13]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [14]:
playGame(observe=False);

TIMESTEP 117000 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.1781793 / Loss  0.9954526424407959
fps: 0.03157483888758658
TIMESTEP 117001 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4843738 / Loss  2.4800281524658203
fps: 4.58738309528575
TIMESTEP 117002 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.6385088 / Loss  0.023736415430903435
fps: 4.064992522855445
TIMESTEP 117003 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.8857243 / Loss  0.0227796733379364
fps: 4.03229059613778
TIMESTEP 117004 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.5637517 / Loss  0.028962697833776474
fps: 4.273491803111659
TIMESTEP 117005 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.874481 / Loss  1.0951833724975586
fps: 3.860949610619143
TIMESTEP 117006 / STATE train / EPSILON 9.999999987391849e-05 / ACT

TIMESTEP 117053 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.053773 / Loss  0.38766008615493774
fps: 4.255074980876883
TIMESTEP 117054 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.0182716 / Loss  0.13573750853538513
fps: 4.273718232190565
TIMESTEP 117055 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.94282615 / Loss  0.20001649856567383
fps: 4.347915867602392
TIMESTEP 117056 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.9356717 / Loss  0.008775477297604084
fps: 4.255273559561395
TIMESTEP 117057 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.9120282 / Loss  61.94590377807617
fps: 4.34838015399656
TIMESTEP 117058 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.81299454 / Loss  1.0630332231521606
fps: 4.132433209980177
TIMESTEP 117059 / STATE train / EPSILON 9.999999987391849e-05 / AC

TIMESTEP 117106 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.5868032 / Loss  0.389534056186676
fps: 4.237304162642496
TIMESTEP 117107 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.7785964 / Loss  0.5640149712562561
fps: 4.92606620932972
TIMESTEP 117108 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.380361 / Loss  10.750003814697266
fps: 4.975248922344769
TIMESTEP 117109 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.347094 / Loss  73.23584747314453
fps: 4.694829122396988
TIMESTEP 117110 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  47.486233 / Loss  1.7251907587051392
fps: 4.877976844663864
TIMESTEP 117111 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9333708 / Loss  2.5126047134399414
fps: 4.926205065143576
TIMESTEP 117112 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / 

TIMESTEP 117159 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.636935 / Loss  1.2037943601608276
fps: 4.672949549394978
TIMESTEP 117160 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.886354 / Loss  5.631412506103516
fps: 4.761909303215361
TIMESTEP 117161 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.9880967 / Loss  3.5234482288360596
fps: 5.127799362800353
TIMESTEP 117162 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4989347 / Loss  1.3448907136917114
fps: 5.025291173788908
TIMESTEP 117163 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.960067 / Loss  1.0968605279922485
fps: 4.901978063075961
TIMESTEP 117164 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4649787 / Loss  0.2512286305427551
fps: 4.629581921876777
TIMESTEP 117165 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 /

TIMESTEP 117212 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.0795207 / Loss  2.182645320892334
fps: 5.1019947913308865
TIMESTEP 117213 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.8374934 / Loss  0.06854578852653503
fps: 4.926124064967508
TIMESTEP 117214 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.2481837 / Loss  1.465163230895996
fps: 5.050567033771563
TIMESTEP 117215 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.572492 / Loss  1.020922064781189
fps: 4.347812205541038
TIMESTEP 117216 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9515965 / Loss  0.6344274878501892
fps: 5.347415284543724
TIMESTEP 117217 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.5360327 / Loss  1.98656165599823
fps: 5.025315257567679
TIMESTEP 117218 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 /

TIMESTEP 117265 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7975826 / Loss  0.7435237765312195
fps: 4.201593765182592
TIMESTEP 117266 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.2044127 / Loss  0.06229454278945923
fps: 2.949522794206584
TIMESTEP 117267 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8264774 / Loss  0.5718636512756348
fps: 4.310014971951995
TIMESTEP 117268 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8151991 / Loss  4.447569370269775
fps: 5.102044440768147
TIMESTEP 117269 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.9516002 / Loss  0.24614375829696655
fps: 5.290821455917432
TIMESTEP 117270 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.229322 / Loss  0.8052999973297119
fps: 4.949860625615147
TIMESTEP 117271 / STATE train / EPSILON 9.999999987391849e-05 / ACTION

TIMESTEP 117318 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.2465285 / Loss  0.12405282258987427
fps: 5.076153730714676
TIMESTEP 117319 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.42096 / Loss  0.06301457434892654
fps: 5.208335713381
TIMESTEP 117320 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.5438718 / Loss  0.09226743131875992
fps: 5.075815865555087
TIMESTEP 117321 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.666982 / Loss  0.26884567737579346
fps: 5.0764056230975445
TIMESTEP 117322 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.6745812 / Loss  0.7595751881599426
fps: 4.7479748423116455
TIMESTEP 117323 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.94087404 / Loss  0.11383199691772461
fps: 4.694708258573042
TIMESTEP 117324 / STATE train / EPSILON 9.999999987391849e-05 / ACT

TIMESTEP 117371 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.0031433 / Loss  0.4432913362979889
fps: 5.1812554430739395
TIMESTEP 117372 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.5703397 / Loss  1.2213233709335327
fps: 4.808106930783869
TIMESTEP 117373 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.4135578 / Loss  1.1486916542053223
fps: 4.901874942295533
TIMESTEP 117374 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD -1 / Q_MAX  2.725194 / Loss  0.5292084217071533
fps: 5.319257894289631
TIMESTEP 117375 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.581546 / Loss  0.16263088583946228
fps: 4.761855240556168
TIMESTEP 117376 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.6932197 / Loss  0.4567800462245941
fps: 5.2627006463091615
TIMESTEP 117377 / STATE train / EPSILON 9.999999987391849e-05 / ACTION

TIMESTEP 117424 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD -1 / Q_MAX  0.12870401 / Loss  67.290771484375
fps: 5.10153557984103
TIMESTEP 117425 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.32252222 / Loss  74.798095703125
fps: 4.975101386735685
TIMESTEP 117426 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.58609647 / Loss  0.11094088107347488
fps: 4.8309898088937215
TIMESTEP 117427 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.6999704 / Loss  0.3355821669101715
fps: 5.1813002465695215
TIMESTEP 117428 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.1161551 / Loss  0.35283198952674866
fps: 5.025044328365362
TIMESTEP 117429 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.4509952 / Loss  0.5067245364189148
fps: 5.00008225576297
TIMESTEP 117430 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0

TIMESTEP 117477 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.11687238 / Loss  0.24053096771240234
fps: 5.154683367580694
TIMESTEP 117478 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.55958855 / Loss  0.15085190534591675
fps: 5.347558456791697
TIMESTEP 117479 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.36920872 / Loss  0.4212420582771301
fps: 5.10209409117173
TIMESTEP 117480 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.0919818 / Loss  0.3538542687892914
fps: 2.97616543839557
TIMESTEP 117481 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.3645811 / Loss  0.33191221952438354
fps: 5.051236102279515
TIMESTEP 117482 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.5584089 / Loss  0.4152595102787018
fps: 5.101988585219172
TIMESTEP 117483 / STATE train / EPSILON 9.999999987391849e-05 / A

TIMESTEP 117530 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8302412 / Loss  0.7763190865516663
fps: 4.367115112100946
TIMESTEP 117531 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.72515 / Loss  0.41753315925598145
fps: 4.255126782225126
TIMESTEP 117532 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2937326 / Loss  1.9977500438690186
fps: 4.425010787423921
TIMESTEP 117533 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.4545877 / Loss  1.434325933456421
fps: 4.4247913831481895
TIMESTEP 117534 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8208675 / Loss  46.267269134521484
fps: 4.328969585877476
TIMESTEP 117535 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.8345695 / Loss  0.981634259223938
fps: 4.310307300374992
TIMESTEP 117536 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1

TIMESTEP 117583 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.3731993 / Loss  100.01722717285156
fps: 4.545237220562402
TIMESTEP 117584 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.9138227 / Loss  14.37942123413086
fps: 4.367028719930699
TIMESTEP 117585 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.3167884 / Loss  0.8891080617904663
fps: 2.906967830917389
TIMESTEP 117586 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD -1 / Q_MAX  2.6297479 / Loss  0.5239698886871338
fps: 4.4444557472799024
TIMESTEP 117587 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.3656769 / Loss  2.356640100479126
fps: 4.761866052989801
TIMESTEP 117588 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.3179774 / Loss  1.3258345127105713
fps: 4.672928724619113
TIMESTEP 117589 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0

TIMESTEP 117636 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.3721673 / Loss  1.1946818828582764
fps: 4.695044590735739
TIMESTEP 117637 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.1399894 / Loss  0.125543013215065
fps: 5.3476880001937985
TIMESTEP 117638 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.8667305 / Loss  0.7740833163261414
fps: 5.434563860567025
TIMESTEP 117639 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.3031883 / Loss  2.197507858276367
fps: 3.2786416745031
TIMESTEP 117640 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.5223665 / Loss  1.2533297538757324
fps: 5.025170758357604
TIMESTEP 117641 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4282877 / Loss  0.9014254212379456
fps: 4.586766050330477
TIMESTEP 117642 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 

TIMESTEP 117690 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.62491775 / Loss  16.472219467163086
fps: 4.048638051543516
TIMESTEP 117691 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.2631381 / Loss  0.07948188483715057
fps: 4.032313855483655
TIMESTEP 117692 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.41755977 / Loss  0.5717567801475525
fps: 3.984011915074697
TIMESTEP 117693 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.022731274 / Loss  0.184667706489563
fps: 3.9999923706200207
TIMESTEP 117694 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.33959404 / Loss  0.0855201929807663
fps: 4.291879889282845
TIMESTEP 117695 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.018250741 / Loss  0.11912336200475693
fps: 4.524848724147715
TIMESTEP 117696 / STATE train / EPSILON 9.999999987391849e-

fps: 4.854445477093516
TIMESTEP 117744 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.59621537 / Loss  0.509475827217102
fps: 4.716893328789969
TIMESTEP 117745 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.5018595 / Loss  0.3871350884437561
fps: 4.975184005598752
TIMESTEP 117746 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.3019548 / Loss  3.6167659759521484
fps: 5.347640272947032
TIMESTEP 117747 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD -1 / Q_MAX  -1.3917954 / Loss  0.6147438287734985
fps: 4.80766603241558
TIMESTEP 117748 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.879751 / Loss  1.545200228691101
fps: 4.5038587132182
TIMESTEP 117749 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.3683856 / Loss  2.269169330596924
fps: 4.7847028371892995
TIMESTEP 117750 / STATE train / EPSILON 9.99999

fps: 4.504434301669978
TIMESTEP 117797 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.301961 / Loss  1.1257524490356445
fps: 4.950550021245456
TIMESTEP 117798 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.210495 / Loss  0.40522345900535583
fps: 4.975148597178581
TIMESTEP 117799 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.785865 / Loss  0.8032054305076599
fps: 5.0504940552022095
TIMESTEP 117800 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.6407092 / Loss  6.484313488006592
fps: 4.950018351742117
TIMESTEP 117801 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9219184 / Loss  1.1828060150146484
fps: 5.128269585109778
TIMESTEP 117802 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.7525163 / Loss  41.37150573730469
fps: 5.025550086508922
TIMESTEP 117803 / STATE train / EPSILON 9.99999998

TIMESTEP 117850 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  100.456055 / Loss  9.45758056640625
fps: 4.2016737357324745
TIMESTEP 117851 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.5559556 / Loss  50.115447998046875
fps: 4.366792295679334
TIMESTEP 117852 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.3171082 / Loss  0.8195439577102661
fps: 4.310404751994985
TIMESTEP 117853 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8288913 / Loss  0.1994471698999405
fps: 4.854361201257366
TIMESTEP 117854 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.854169 / Loss  50.34355926513672
fps: 4.901932231082355
TIMESTEP 117855 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.6681292 / Loss  64.2820816040039
fps: 4.854394911240686
TIMESTEP 117856 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / 

fps: 4.975095485493322
TIMESTEP 117904 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.61637604 / Loss  0.09026771783828735
fps: 4.5248926575614385
TIMESTEP 117905 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  74.412155 / Loss  58.21738815307617
fps: 4.9505149625964595
TIMESTEP 117906 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.75997674 / Loss  0.2807171046733856
fps: 4.650839729530137
TIMESTEP 117907 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.020465 / Loss  0.46889278292655945
fps: 4.901926502143414
TIMESTEP 117908 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.2690991 / Loss  0.39852017164230347
fps: 5.000064373845145
TIMESTEP 117909 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  118.41024 / Loss  0.5029116272926331
fps: 5.050433241338785
TIMESTEP 117910 / STATE train / EPSILON 9.

TIMESTEP 117957 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.7458806 / Loss  0.8164671659469604
fps: 4.38598927528715
TIMESTEP 117958 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.650501 / Loss  0.8194429278373718
fps: 4.366851399342209
TIMESTEP 117959 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.7821486 / Loss  0.9046391248703003
fps: 4.424772711448507
TIMESTEP 117960 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.833394 / Loss  0.8374088406562805
fps: 4.291822797607232
TIMESTEP 117961 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.3626919 / Loss  0.9956800937652588
fps: 4.385970929624595
TIMESTEP 117962 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.1753197 / Loss  0.3482927978038788
fps: 4.184103156504442
TIMESTEP 117963 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 

KeyboardInterrupt: 