In [77]:
!pip install tensorflow
!pip install gym
!pip install keras
!pip install keras-rl2
!pip install pygame

Collecting pygame
  Downloading pygame-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.7/13.7 MB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pygame
Successfully installed pygame-2.1.3


In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
from matplotlib import pyplot as plt
import matplotlib.animation
from IPython.display import display, clear_output
import random
import pygame

In [2]:
code2action = {
    0:np.array([0, 0, 0, 0]),
    1:np.array([0, 0, 1, 0]),
    2:np.array([0, 0, -1, 0]),
    3:np.array([0, 0, 0, 1]),
    4:np.array([0, 0, 0, -1]),
    5:np.array([0, 0, 1, 1]),
    6:np.array([0, 0, 1, -1]),
    7:np.array([0, 0, -1, 1]),
    8:np.array([0, 0, -1, -1])
}

class ShrinkingCircleEnv(Env):
    width = height = d = 10
    time = 60
    terminated_penalty = 1
    window = None
    window_size = 512
    clock = None
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 15}
    
    def __init__(self):
        self.action_space = Discrete(9)
        self.bounds = np.array([self.height, self.width, self.height, self.width]) - 1
        self.observation_space = Box(np.zeros(4), self.bounds)
        
        
        self.state = (np.random.random_sample(size=4)*self.bounds).astype(int)
        
        self.circle_radius = 0
        
        self.timestamp = self.time
        
        # for rendering
        self.log_circles = []
        self.log_hero = []

        
        
    def get_reward(self):
        circle, hero = self.state[:2], self.state[2:]
        return - np.linalg.norm(hero - circle)
        
    def step(self, action):
        new_state = self.state + code2action[action]
        terminated = not ((0 <= new_state).all() and (new_state < self.bounds).all())
        
        self.timestamp -= 1 
        self.truncated = self.timestamp<=0
        
        reward = self.get_reward()
        
        # move center of circle with probability 0.2
        if np.random.binomial(1, 0.2):
            self.state = np.hstack((
                new_state[:2] + random.choice(list(code2action.values()))[2:],
                new_state[2:]
            ))
        
        # to stay on the playing field
        self.state = np.clip(self.state, np.zeros(4), self.bounds)
            
        info = {}
        return self.state, reward, self.truncated, info
    
    def render(self, mode):
        self.render_mode = mode
        circle, hero = self.state[:2], self.state[2:]
        self.log_circles.append(circle)
        self.log_hero.append(hero)
        
        return self._render_frame()
    
    # inspired by https://www.gymlibrary.dev/content/environment_creation/
    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode((self.window_size, self.window_size))
        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()
        
        
        circle, hero = self.state[:2], self.state[2:]
        
        canvas = pygame.Surface((self.window_size, self.window_size))
        canvas.fill((255, 255, 255))
        pix_square_size = (
            self.window_size / self.d
        )  # The size of a single grid square in pixels

        # First we draw the hero
        pygame.draw.rect(
            canvas,
            (255, 0, 0),
            pygame.Rect(
                pix_square_size * hero,
                (pix_square_size, pix_square_size),
            ),
        )
        # Now we draw the circle
        pygame.draw.circle(
            canvas,
            (0, 0, 255),
            (circle + 0.5) * pix_square_size,
            pix_square_size / 3,
        )

        # Finally, add some gridlines
        for x in range(self.d + 1):
            pygame.draw.line(
                canvas,
                0,
                (0, pix_square_size * x),
                (self.window_size, pix_square_size * x),
                width=3,
            )
            pygame.draw.line(
                canvas,
                0,
                (pix_square_size * x, 0),
                (pix_square_size * x, self.window_size),
                width=3,
            )
        
        font = pygame.font.Font(None, 25)
        text = font.render(f"Time remaining: {str(self.timestamp)}", True,(0, 0, 128))
        text_rect = text.get_rect(center=(self.window_size/6, self.window_size/15))
        
        if self.render_mode == "human":
            # The following line copies our drawings from `canvas` to the visible window
            self.window.blit(canvas, canvas.get_rect())
            self.window.blit(text, text_rect)
            pygame.event.pump()
            pygame.display.update()

            # We need to ensure that human-rendering occurs at the predefined framerate.
            # The following line will automatically add a delay to keep the framerate stable.
            self.clock.tick(self.metadata["render_fps"])
        else:  # rgb_array
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
            )
        
    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()
    
    def reset(self):
        
        self.state = (np.random.random_sample(size=4)*self.bounds).astype(int)
        self.circle_radius = 0 #min(self.height, self.width)
        self.timestamp = self.time
        
        self.log_circles = []
        self.log_hero = []
        self.window = None
        self.clock = None
        return self.state
    

In [3]:
env = ShrinkingCircleEnv()

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [4]:
episodes = 3
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
#         env.render('human')
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:-225.69925692169008
Episode:2 Score:-287.00561184937595
Episode:3 Score:-515.7849374229106


### Let's learn the model

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, CategoryEncoding
from tensorflow.keras.optimizers.legacy import Adam

2023-02-25 00:35:01.669567: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-25 00:35:01.994562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /homes/vladtom/miniconda3/lib/python3.10/site-packages/cv2/../../lib64:
2023-02-25 00:35:01.994598: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-02-25 00:35:03.101111: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfe

In [6]:
def get_model(states, n_actions=9, compact=False):
    model = Sequential() 
    model.add(keras.Input(shape=(1,states[0])))
    if not compact:
        model.add(Dense(1024, activation='relu'))
        model.add(Dense(100, activation='relu'))
    model.add(Flatten()) 
    model.add(Dense(n_actions, activation='linear'))
    return model

In [7]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [8]:
def get_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [9]:
n_states = env.observation_space.shape
n_actions = env.action_space.n

model = get_model(n_states, n_actions)
dqn = get_agent(model, n_actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

  super().__init__(name, **kwargs)
2023-02-25 00:35:05.185222: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /homes/vladtom/miniconda3/lib/python3.10/site-packages/cv2/../../lib64:
2023-02-25 00:35:05.185367: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /homes/vladtom/miniconda3/lib/python3.10/site-packages/cv2/../../lib64:
2023-02-25 00:35:05.185460: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /homes/vladtom/miniconda3/lib/python3.10/s

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [10]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: -254.775, steps: 60


  updates=self.state_updates,
2023-02-25 00:35:12.302804: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_2/BiasAdd' id:117 op device:{requested: '', assigned: ''} def:{{{node dense_2/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2/MatMul, dense_2/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 00:35:12.326201: W tensorflow/c/c_api.cc:291] Operation '{name:'total_2/Assign' id:382 op device:{requested: '', assigned: ''} def:{{{node total_2/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](total_2, total_2/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modi

Episode 2: reward: -515.735, steps: 60
Episode 3: reward: -412.104, steps: 60
Episode 4: reward: -411.073, steps: 60
Episode 5: reward: -495.668, steps: 60
Episode 6: reward: -413.858, steps: 60
Episode 7: reward: -444.638, steps: 60
Episode 8: reward: -481.286, steps: 60
Episode 9: reward: -443.374, steps: 60
Episode 10: reward: -194.967, steps: 60
Episode 11: reward: -354.313, steps: 60
Episode 12: reward: -288.513, steps: 60
Episode 13: reward: -192.722, steps: 60
Episode 14: reward: -370.973, steps: 60
Episode 15: reward: -304.645, steps: 60
Episode 16: reward: -400.049, steps: 60
Episode 17: reward: -181.095, steps: 60
Episode 18: reward: -458.646, steps: 60
Episode 19: reward: -405.603, steps: 60
Episode 20: reward: -329.403, steps: 60
Episode 21: reward: -335.037, steps: 60
Episode 22: reward: -183.614, steps: 60
Episode 23: reward: -231.216, steps: 60
Episode 24: reward: -276.245, steps: 60
Episode 25: reward: -579.682, steps: 60
Episode 26: reward: -545.108, steps: 60
Episode 

In [56]:
dqn.fit(env, nb_steps=20000, visualize=False, verbose=1)

Training for 20000 steps ...
Interval 1 (0 steps performed)


2023-02-24 20:00:38.557947: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_11/BiasAdd' id:2594 op device:{requested: '', assigned: ''} def:{{{node dense_11/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_11/MatMul, dense_11/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-24 20:00:38.637886: W tensorflow/c/c_api.cc:291] Operation '{name:'total_12/Assign' id:2839 op device:{requested: '', assigned: ''} def:{{{node total_12/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](total_12, total_12/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after runni

    1/10000 [..............................] - ETA: 35:38 - reward: -3.6056

2023-02-24 20:00:38.788645: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_11_1/BiasAdd' id:2710 op device:{requested: '', assigned: ''} def:{{{node dense_11_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_11_1/MatMul, dense_11_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-24 20:00:39.104717: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_15/AddN' id:2970 op device:{requested: '', assigned: ''} def:{{{node loss_15/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_15/mul, loss_15/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-

  510/10000 [>.............................] - ETA: 2:01 - reward: -4.2579done, took 6.800 seconds


<keras.callbacks.History at 0x7f85986bafe0>

In [87]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...


  updates=self.state_updates,
2023-02-24 20:08:00.256401: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_17/BiasAdd' id:3921 op device:{requested: '', assigned: ''} def:{{{node dense_17/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_17/MatMul, dense_17/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-24 20:08:00.397395: W tensorflow/c/c_api.cc:291] Operation '{name:'count_22/Assign' id:4191 op device:{requested: '', assigned: ''} def:{{{node count_22/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_22, count_22/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either 

human


error: No available video device

In [12]:
# https://stackoverflow.com/questions/25333732/matplotlib-animation-not-working-in-ipython-notebook-blank-plot

In [14]:
scores_log = {}
for n_steps in [1000, 5000, 10000, 15000]:
    model = get_model(n_states, n_actions, compact=True)
    dqn = get_agent(model, n_actions)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    dqn.fit(env, nb_steps=n_steps, visualize=False, verbose=1)
    scores = dqn.test(env, nb_episodes=100, visualize=False)
    scores_log[n_steps] = np.mean(scores.history['episode_reward'])

  super().__init__(name, **kwargs)
2023-02-25 01:32:44.383959: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_33_1/bias/Assign' id:8802 op device:{requested: '', assigned: ''} def:{{{node dense_33_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_33_1/bias, dense_33_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 1000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,
2023-02-25 01:32:45.106672: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_33/BiasAdd' id:8781 op device:{requested: '', assigned: ''} def:{{{node dense_33/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_33/MatMul, dense_33/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:32:45.351855: W tensorflow/c/c_api.cc:291] Operation '{name:'count_44/Assign' id:8893 op device:{requested: '', assigned: ''} def:{{{node count_44/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_44, count_44/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either 

    1/10000 [..............................] - ETA: 1:35:52 - reward: -3.1623

2023-02-25 01:32:45.690468: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_33_1/BiasAdd' id:8807 op device:{requested: '', assigned: ''} def:{{{node dense_33_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_33_1/MatMul, dense_33_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:32:46.054133: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_47/AddN' id:9019 op device:{requested: '', assigned: ''} def:{{{node loss_47/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_47/mul, loss_47/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-

  997/10000 [=>............................] - ETA: 1:30 - reward: -6.1817done, took 10.667 seconds
Testing for 100 episodes ...
Episode 1: reward: -112.000, steps: 60
Episode 2: reward: -294.498, steps: 60
Episode 3: reward: -460.022, steps: 60
Episode 4: reward: -262.560, steps: 60
Episode 5: reward: -383.005, steps: 60
Episode 6: reward: -277.662, steps: 60
Episode 7: reward: -317.106, steps: 60
Episode 8: reward: -488.024, steps: 60
Episode 9: reward: -259.566, steps: 60
Episode 10: reward: -518.755, steps: 60
Episode 11: reward: -331.904, steps: 60
Episode 12: reward: -329.018, steps: 60
Episode 13: reward: -357.307, steps: 60
Episode 14: reward: -279.401, steps: 60
Episode 15: reward: -431.188, steps: 60
Episode 16: reward: -288.292, steps: 60
Episode 17: reward: -374.036, steps: 60
Episode 18: reward: -416.121, steps: 60
Episode 19: reward: -392.670, steps: 60
Episode 20: reward: -295.328, steps: 60
Episode 21: reward: -97.008, steps: 60
Episode 22: reward: -406.642, steps: 60
E

2023-02-25 01:33:01.371434: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_34_1/bias/Assign' id:9305 op device:{requested: '', assigned: ''} def:{{{node dense_34_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_34_1/bias, dense_34_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 5000 steps ...
Interval 1 (0 steps performed)


2023-02-25 01:33:02.119396: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_34/BiasAdd' id:9284 op device:{requested: '', assigned: ''} def:{{{node dense_34/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_34/MatMul, dense_34/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:33:02.408084: W tensorflow/c/c_api.cc:291] Operation '{name:'count_48/Assign' id:9396 op device:{requested: '', assigned: ''} def:{{{node count_48/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_48, count_48/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after runni

    1/10000 [..............................] - ETA: 1:47:17 - reward: -3.6056

2023-02-25 01:33:02.772604: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_34_1/BiasAdd' id:9310 op device:{requested: '', assigned: ''} def:{{{node dense_34_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_34_1/MatMul, dense_34_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:33:03.168100: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_51/AddN' id:9522 op device:{requested: '', assigned: ''} def:{{{node loss_51/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_51/mul, loss_51/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-

Testing for 100 episodes ...
Episode 1: reward: -256.288, steps: 60
Episode 2: reward: -249.211, steps: 60
Episode 3: reward: -183.886, steps: 60
Episode 4: reward: -129.235, steps: 60
Episode 5: reward: -508.187, steps: 60
Episode 6: reward: -452.140, steps: 60
Episode 7: reward: -196.511, steps: 60
Episode 8: reward: -365.287, steps: 60
Episode 9: reward: -267.529, steps: 60
Episode 10: reward: -418.087, steps: 60
Episode 11: reward: -155.106, steps: 60
Episode 12: reward: -257.382, steps: 60
Episode 13: reward: -323.810, steps: 60
Episode 14: reward: -163.233, steps: 60
Episode 15: reward: -294.320, steps: 60
Episode 16: reward: -184.933, steps: 60
Episode 17: reward: -108.507, steps: 60
Episode 18: reward: -352.627, steps: 60
Episode 19: reward: -170.124, steps: 60
Episode 20: reward: -160.156, steps: 60
Episode 21: reward: -189.340, steps: 60
Episode 22: reward: -544.382, steps: 60
Episode 23: reward: -273.472, steps: 60
Episode 24: reward: -329.808, steps: 60
Episode 25: reward: 

2023-02-25 01:33:52.907691: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_35_1/bias/Assign' id:9808 op device:{requested: '', assigned: ''} def:{{{node dense_35_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_35_1/bias, dense_35_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 10000 steps ...
Interval 1 (0 steps performed)


2023-02-25 01:33:53.702633: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_35/BiasAdd' id:9787 op device:{requested: '', assigned: ''} def:{{{node dense_35/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_35/MatMul, dense_35/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:33:54.010626: W tensorflow/c/c_api.cc:291] Operation '{name:'count_52/Assign' id:9899 op device:{requested: '', assigned: ''} def:{{{node count_52/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_52, count_52/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after runni

    1/10000 [..............................] - ETA: 1:55:47 - reward: -2.2361

2023-02-25 01:33:54.406555: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_35_1/BiasAdd' id:9813 op device:{requested: '', assigned: ''} def:{{{node dense_35_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_35_1/MatMul, dense_35_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:33:54.821821: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_55/AddN' id:10025 op device:{requested: '', assigned: ''} def:{{{node loss_55/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_55/mul, loss_55/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02

done, took 86.693 seconds
Testing for 100 episodes ...
Episode 1: reward: -139.351, steps: 60
Episode 2: reward: -332.048, steps: 60
Episode 3: reward: -92.006, steps: 60
Episode 4: reward: -313.736, steps: 60
Episode 5: reward: -334.136, steps: 60
Episode 6: reward: -281.411, steps: 60
Episode 7: reward: -310.364, steps: 60
Episode 8: reward: -282.835, steps: 60
Episode 9: reward: -208.411, steps: 60
Episode 10: reward: -315.280, steps: 60
Episode 11: reward: -171.734, steps: 60
Episode 12: reward: -254.972, steps: 60
Episode 13: reward: -247.485, steps: 60
Episode 14: reward: -279.719, steps: 60
Episode 15: reward: -480.154, steps: 60
Episode 16: reward: -131.490, steps: 60
Episode 17: reward: -387.048, steps: 60
Episode 18: reward: -479.140, steps: 60
Episode 19: reward: -167.459, steps: 60
Episode 20: reward: -405.495, steps: 60
Episode 21: reward: -398.035, steps: 60
Episode 22: reward: -472.366, steps: 60
Episode 23: reward: -146.537, steps: 60
Episode 24: reward: -262.397, steps

2023-02-25 01:35:25.864848: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_36_1/bias/Assign' id:10311 op device:{requested: '', assigned: ''} def:{{{node dense_36_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_36_1/bias, dense_36_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 15000 steps ...
Interval 1 (0 steps performed)


2023-02-25 01:35:26.724006: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_36/BiasAdd' id:10290 op device:{requested: '', assigned: ''} def:{{{node dense_36/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_36/MatMul, dense_36/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:35:27.058612: W tensorflow/c/c_api.cc:291] Operation '{name:'count_57/Assign' id:10412 op device:{requested: '', assigned: ''} def:{{{node count_57/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_57, count_57/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after run

    1/10000 [..............................] - ETA: 2:08:18 - reward: -6.3246

2023-02-25 01:35:27.503688: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_36_1/BiasAdd' id:10316 op device:{requested: '', assigned: ''} def:{{{node dense_36_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_36_1/MatMul, dense_36_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-25 01:35:27.951155: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_59/AddN' id:10528 op device:{requested: '', assigned: ''} def:{{{node loss_59/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_59/mul, loss_59/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-0

166 episodes - episode_reward: -308.798 [-574.726, -67.997] - loss: 21.084 - mae: 35.311 - mean_q: -37.765

Interval 2 (10000 steps performed)
Testing for 100 episodes ...
Episode 1: reward: -318.966, steps: 60
Episode 2: reward: -361.902, steps: 60
Episode 3: reward: -273.567, steps: 60
Episode 4: reward: -255.433, steps: 60
Episode 5: reward: -179.865, steps: 60
Episode 6: reward: -428.808, steps: 60
Episode 7: reward: -216.606, steps: 60
Episode 8: reward: -184.364, steps: 60
Episode 9: reward: -164.103, steps: 60
Episode 10: reward: -435.612, steps: 60
Episode 11: reward: -349.788, steps: 60
Episode 12: reward: -390.604, steps: 60
Episode 13: reward: -290.370, steps: 60
Episode 14: reward: -212.644, steps: 60
Episode 15: reward: -213.657, steps: 60
Episode 16: reward: -402.042, steps: 60
Episode 17: reward: -220.162, steps: 60
Episode 18: reward: -337.833, steps: 60
Episode 19: reward: -351.669, steps: 60
Episode 20: reward: -358.305, steps: 60
Episode 21: reward: -434.217, steps: 

In [13]:
scores_log

{1000: -308.8078342331893,
 5000: -143.2580174170538,
 10000: -155.78609127837154,
 15000: -149.57824776422228,
 20000: -228.72297844445487,
 25000: -204.61390880288442,
 35000: -159.23455552933467,
 45000: -157.56615966927995}

In [15]:
scores_log

{1000: -333.8648923996682,
 5000: -281.39773406147486,
 10000: -293.11126799543183,
 15000: -312.95515351060504}