In [1]:
from environment.models.simple_control_fixed import SimpleControlledFixedEnv

import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
import tensorflow as tf
import random
from collections import namedtuple, deque


2024-07-08 10:25:31.457768: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-08 10:25:31.525548: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
class Agent(object):
    ''' Base agent class, used as a parent class

        Args:
            n_actions (int): number of actions

        Attributes:
            n_actions (int): where we store the number of actions
            last_action (np.array): last action taken by the agent
    '''
    def __init__(self, n_actions: int):
        self.n_actions = n_actions
        self.last_action = None

    def forward(self, state: np.ndarray):
        ''' Performs a forward computation '''
        pass

    def backward(self):
        ''' Performs a backward pass on the network '''
        pass

In [3]:
class RandomAgent(Agent):
    ''' Agent taking actions uniformly at random, child of the class Agent'''
    def __init__(self, n_actions: int):
        super(RandomAgent, self).__init__(n_actions)

    def forward(self, state: np.ndarray) -> int:
        ''' Compute an action uniformly at random across n_actions possible
            choices

            Returns:
                action np.array(int): the random action for each angle
        '''
        action = []
        for i in range(self.n_actions):
            action.append(np.random.randint(-2*np.pi, 2*np.pi))
        self.last_action = np.array(action)
        return self.last_action

Create a class for the experience replay buffer

In [4]:
Experience = namedtuple('Experience',
                        ['state', 'action', 'reward', 'next_state', 'done'])
class ExperienceReplayBuffer(object):
    """ Class used to store a buffer containing experiences of the RL agent.
    """
    def __init__(self, maximum_length):
        # Create buffer of maximum length
        self.buffer = deque(maxlen=maximum_length)
        self.latest_experience = None

    def append(self, experience):
        # Append experience to the buffer
        if(self.latest_experience is not None):
            self.buffer.append(self.latest_experience)

        self.latest_experience = experience

    def __len__(self):
        # overload len operator
        return len(self.buffer)

    def sample_batch(self, n):
        """ Function used to sample experiences from the buffer.
            returns 5 lists, each of size n. Returns a list of state, actions,
            rewards, next states and done variables.
        """
        # If we try to sample more elements that what are available from the
        # buffer we raise an error
        if n > len(self.buffer):
            raise IndexError('Tried to sample too many elements from the buffer!')
        
        # combined experience replay
        # # inclued latest experience in the sampled batch
                    
        batch = random.sample(self.buffer, n - 1)
        batch.append(self.latest_experience)

        # batch is a list of size n, where each element is an Experience tuple
        # of 5 elements. To convert a list of tuples into
        # a tuple of list we do zip(*batch). In this case this will return a
        # tuple of 5 elements where each element is a list of n elements.

        return zip(*batch)

In [5]:
class QNetwork_Actor(keras.models.Model):
    def __init__(self, input_size, output_size):
        super(QNetwork_Actor, self).__init__()

        self.input_layer = keras.layers.keras.layers.Dense(64, activation='relu')
        self.hidden_layer1 = keras.layers.keras.layers.Dense(16, activation='relu')
        
        self.hidden_value_layer1 = keras.layers.keras.layers.Dense(128, activation='relu')
        self.hidden_advantage_layer1 = keras.layers.keras.layers.Dense(128, activation='relu')
        self.value_layer = keras.layers.keras.layers.Dense(1)
        self.advantage_layer = keras.layers.keras.layers.Dense(output_size)

    def call(self, x):
        _in = keras.layers.ReLU()(self.input_layer(x))
        l1 = keras.layers.ReLU()(self.hidden_layer1(_in))

        v1 = keras.layers.ReLU()(self.hidden_value_layer1(l1))
        v2 = self.value_layer(v1)

        a1 = keras.layers.ReLU()(self.hidden_advantage_layer1(l1))
        a2 = self.advantage_layer(a1)

        q = v2 + a2 - tf.reduce_mean(a2, axis=-1, keepdims=True)
        return q
    
    def compute_q_values(self, states, actions):
        q_values = self(states)
        selected_q_values = tf.gather(q_values, actions, axis=1)
        return selected_q_values

    def update(self, optimizer, loss_function, predicted_q_values, target_values):
        with tf.GradientTape() as tape:
            loss = loss_function(predicted_q_values, target_values)
        gradients = tape.gradient(loss, self.trainable_variables)
        optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return loss

def epsilon_decay(epsilon_min, epsilon_max, decay_step, k):
    decayed_epsilon = max(epsilon_min, epsilon_max * (epsilon_min / epsilon_max) ** ((k - 1)/(decay_step - 1)))
    return decayed_epsilon

In [6]:
class DDPGAgent(Agent):
    def __init__(self, state_size, action_size, replay_length=5000, batch_size=64, gamma=0.99, learning_rate=1e-3, n_episodes=800, tau=0.001):
        super(DDPGAgent, self).__init__(action_size)
        
        self.learning_rate = learning_rate
        self.n_episodes = n_episodes
        self.episode = 0
        self.epsilon = 1
        self.Z = 0.9*self.n_episodes
        self.epsilon_max = 0.99
        self.epsilon_min = 0.05
        self.tau = tau
        
        # env specific
        self.min_action = -2*np.pi
        self.max_action = 2*np.pi
        
        # step 1:
        ### Create critic network
        self.optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
        self.critic_q_network = self._build_network(state_size, 1)
        self.critic_target_network = self._build_network(state_size, 1)
        self.critic_target_network.set_weights(self.critic_q_network.get_weights())
        ### Create actor network
        self.actor_q_network = self._build_network(state_size, action_size)
        self.actor_target_network = self._build_network(state_size, action_size)
        self.actor_target_network.set_weights(self.actor_q_network.get_weights())
        
        # step 2:
        ### Create Experience replay buffer
        self.buffer = ExperienceReplayBuffer(maximum_length=replay_length)
        self.batch_size = batch_size
        self.gamma = gamma
        
        ### Agent init
        self.state_size = state_size
        self.action_size = action_size
        
        ### Steps
        self.target_update_rate = int(replay_length/batch_size) # suggested as tip
        self.steps = 0  # Counter for steps taken
        
    def _build_network(self, state_size, action_size):
        # model = keras.models.Sequential()
        # model.add(keras.layers.Dense(24, input_dim=state_size, activation='relu'))
        # model.add(keras.layers.Dense(24, activation='relu'))
        # model.add(keras.layers.Dense(action_size, activation='linear'))
        # model.compile(loss='mse', optimizer=self.optimizer)
        model = QNetwork_Actor(state_size, action_size)
        return model

    def forward(self, state):
        # step 7:
        # take noisy continuous action a_t at s_t   
        # print(f"state passed to actor q network: {state}")
        q = self.actor_q_network.predict(state, verbose=0)
        # loop over the actions and add noise
        for i in range(len(q)):
            # assign noise
            noise = np.random.uniform(self.min_action, self.max_action)
            q[i] = q[i] + noise
        # print(f"q: {q}")
        return q
     
    def sample(self, state):
        if not isinstance(state[0], np.float32):
            state = np.array(state[0])
        q = self.q_network.predict(state)
        return np.argmax(q)

    def backward(self):
        if len(self.buffer.buffer) < self.batch_size:
            return
        
        # step 9:
        # Sample a batch of experiences from the buffer
        batch = Experience(*self.buffer.sample_batch(self.batch_size))        
        batch_mask = ~np.array(batch.done, dtype=bool)

        next_state = np.stack(batch.next_state)
        states = np.stack(batch.state).reshape(self.batch_size, self.state_size)
        actions = np.concatenate(batch.action)
        rewards = np.concatenate(batch.reward)        

        # step 10:
        # Compute target values for each experience in the batch
        target_values = tf.where(batch_mask, rewards, 0)
        target_values = tf.where(~batch_mask, target_values + self.gamma * self.critic_target_network.predict(next_state, verbose=0), target_values)


        # step 11:
        # Compute predicted Q-values for the states and actions in the batch
        # print(states.shape)
        # predicted_q_values = self.critic_q_network.predict(states)
        # print(predicted_q_values.shape)
        # predicted_q_values = predicted_q_values[tf.range(len(actions)), actions]

        # Update critic Q-network weights using the computed values (backward pass SGD on the MSE loss)
        with tf.GradientTape() as tape:
            predicted_q_values = self.critic_q_network(states, training=True)
            # ENSURE TYPE
            # Ensure both tensors are of the same type, float32 in this case
            predicted_q_values = tf.cast(predicted_q_values, tf.float32)
            target_values = tf.cast(target_values, tf.float32)
            loss = tf.reduce_mean(tf.square(target_values - predicted_q_values))
        # Compute the gradients of the loss with respect to the model's weights
        gradients = tape.gradient(loss, self.critic_q_network.trainable_variables)
        # print(gradients)
        # Apply the gradients to the model's weights
        self.optimizer.apply_gradients(zip(gradients, self.critic_q_network.trainable_variables))
        # self.critic_q_network.fit(np.stack(states, actions), target_values, epochs=1, verbose=0)
        # def loss_function_MSE(predicted_q_values, target_values):
        #     _ret = target_values - predicted_q_values
        #     return tf.reduce_mean(tf.square(_ret))
        # self.critic_q_network.update(self.optimizer, loss_function_MSE, predicted_q_values, target_values)
        

        # step 12:
        if self.steps % self.target_update_rate == self.target_update_rate - 1:
            # step 13:
            # update critic
            # def loss_function_J(states, qvalues):
                # selected_q_values = -tf.gather(qvalues, self.actor_q_network.predict(states), axis=1)
                # # _ret = -self.critic_q_network.compute_q_values(states, self.actor_q_network.predict(states))
                # return tf.reduce_mean(selected_q_values)
            # self.actor_q_network.update(self.optimizer, loss_function_J, states, target_values)
            with tf.GradientTape() as tape:
                # loss = loss_function_J(states, _q_values)
                actions = self.actor_q_network(states, training=True)
                q_values = self.critic_q_network(tf.concat([states, actions], axis=1), training=True)
                # _ret = -self.critic_q_network.compute_q_values(states, self.actor_q_network.predict(states))
                loss = -tf.reduce_mean(q_values)
            gradients = tape.gradient(loss, self.actor_q_network.trainable_variables)
            self.optimizer.apply_gradients(zip(gradients, self.actor_q_network.trainable_variables))

            # step 14:
            # stof update target networks
            # TODO: soft update target networks
            # Soft update the critic target network
            for target_param, param in zip(self.critic_target_network.variables, self.critic_q_network.variables):
                target_param.assign(self.tau * param + (1 - self.tau) * target_param)

            # Soft update the actor target network
            for target_param, param in zip(self.actor_target_network.variables, self.actor_q_network.variables):
                target_param.assign(self.tau * param + (1 - self.tau) * target_param)
                        
            pass
        
        # step 16:
        # Increment steps counter
        self.steps += 1

In [7]:
def running_average(x, N):
    ''' Function used to compute the running average
        of the last N elements of a vector x
    '''
    if len(x) >= N:
        y = np.copy(x)
        y[N-1:] = np.convolve(x, np.ones((N, )) / N, mode='valid')
    else:
        y = np.zeros_like(x)
    return y


def faster_running_average(x, N, last_average):
    if len(x) > N:
        return last_average + (1./N)*(x[-1] - x[-N - 1])
    else:
        return sum(x)/len(x)

In [8]:
# Parameters
N_episodes = 1000                 # Number of episodes # 100 - 1000
discount_factor = 0.99            # Value of the discount factor
n_ep_running_average = 50         # Running average of 50 episodes
n_actions = 4                     # Number of available actions (nr of angles)
dim_state = 2                     # State dimensionality
replay_size = 5000                # 5000 - 30000
batch_size = 16                   # 4 - 128
learning_rate = 1e-4              # 1e-3 - 1e-4
target_reward = 0                 # specified in lab
max_env_steps = 1000              # to stop the episode
# Used to update target networks
tau = 0.005

# We will use these variables to compute the average episodic reward and
# the average number of steps per episode
episode_reward_list = []       # this list contains the total reward per episode
episode_number_of_steps = []   # this list contains the number of steps per episode

# agent initialization
# agent = RandomAgent(n_actions) # random
# step 1 & 2:
agent = DDPGAgent(dim_state, n_actions, replay_size, batch_size=batch_size, gamma=discount_factor, learning_rate=learning_rate, n_episodes=N_episodes, tau=tau)


2024-07-08 10:25:32.664360: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
env = SimpleControlledFixedEnv()

In [10]:
### Training process
from tqdm import trange
# trange is an alternative to range in python, from the tqdm library
# It shows a nice progression bar that you can update with useful information
EPISODES = trange(N_episodes, desc='Episode: ', leave=True)
actual_episodes = 0

Episode:   0%|          | 0/1000 [00:00<?, ?it/s]

To facilitate getting higher-quality training data, you may reduce the scale of the noise over the course of training. (We do not do this in our implementation, and keep noise scale fixed throughout.)

At test time, to see how well the policy exploits what it has learned, we do not add noise to the actions.

Our DDPG implementation uses a trick to improve exploration at the start of training. For a fixed number of steps at the beginning (set with the start_steps keyword argument), the agent takes actions which are sampled from a uniform random distribution over valid actions. After that, it returns to normal DDPG exploration.

In [11]:
avg_reward = 0.
avg_steps = 0.
# step 3:
# episode loop
for i in EPISODES:
    # Reset environment data and initialize variables
    done = False
    # step 4:
    state = env.reset()
    state = np.array(state, dtype=np.float32)
 
    total_episode_reward = 0.
    # step 5:
    t = 0
    # step 6: 
    # environment loop
    while not done:
        # necessary for lunar lander. It doesn't implement a default
        # max-timesteps and rover hovers forever

        # step 7:
        # noisy actor action
        state = np.array(state).reshape(1, dim_state)
        # print(f"state passed to agent.forward: {state}")

        action = agent.forward(state)

        # step 8:
        # Execute action in the environment and append
        next_state, reward, done = env.step(action)
        # append to buffer
        agent.buffer.append(Experience(state, action, np.array([reward]), next_state, done))
        
        # step 9-15: see function definition
        agent.backward()
        
        # Update episode reward
        total_episode_reward += reward

        # step 16: Update state for next iteration
        state = next_state
        t += 1

    # Append episode reward and total number of steps
    episode_reward_list.append(total_episode_reward)
    episode_number_of_steps.append(t)

    avg_reward = faster_running_average(episode_reward_list, n_ep_running_average, avg_reward)
    avg_steps = faster_running_average(episode_number_of_steps, n_ep_running_average, avg_steps)

    agent.episode += 1

    # Updates the tqdm update bar with fresh information
    # (episode number, total reward of the last episode, total number of Steps
    # of the last episode, average reward, average number of steps)
    EPISODES.set_description(
        "Episode {} - Reward/Steps: {:.1f}/{} - Avg. Reward/Steps: {:.1f}/{:.1f}".format(
        i, total_episode_reward, t,
        avg_reward,
        avg_steps)
        )

    actual_episodes += 1
    
    # stop if we hit reward threshold
    if avg_reward >= target_reward:
        break

Episode:   0%|          | 0/1000 [00:25<?, ?it/s]


ValueError: No gradients provided for any variable: (['dense_6/kernel:0', 'dense_6/bias:0', 'dense_7/kernel:0', 'dense_7/bias:0', 'dense_8/kernel:0', 'dense_8/bias:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'dense_6/kernel:0' shape=(2, 24) dtype=float32, numpy=
array([[ 0.46713018,  0.07335341, -0.31621027,  0.14695662, -0.26830322,
         0.17733127,  0.31251323,  0.30520093, -0.4788874 , -0.47912714,
         0.10030419, -0.34102076, -0.11341074,  0.01132408, -0.05056155,
         0.32461846, -0.4759035 ,  0.00794685, -0.15827361, -0.01987347,
         0.44855207, -0.15865237,  0.27278554, -0.25145754],
       [ 0.14509296,  0.03135055, -0.03097269,  0.01196054, -0.26576394,
         0.13782346, -0.17710218,  0.27114856, -0.14365238, -0.45348582,
        -0.22688562,  0.33387166, -0.12117422,  0.34624094, -0.02886394,
        -0.07182831, -0.13947079, -0.13867593,  0.03760493,  0.1427604 ,
         0.2035662 , -0.3678884 ,  0.4324237 , -0.19486803]],
      dtype=float32)>), (None, <tf.Variable 'dense_6/bias:0' shape=(24,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0.], dtype=float32)>), (None, <tf.Variable 'dense_7/kernel:0' shape=(24, 24) dtype=float32, numpy=
array([[ 0.08333975,  0.06303248, -0.14032362,  0.21493855,  0.09855062,
         0.3192682 , -0.12236789, -0.00937986, -0.11860704, -0.13438311,
        -0.32082298, -0.20007218, -0.05600727, -0.07576266,  0.22516838,
         0.01250091, -0.3176613 ,  0.34652588, -0.159886  , -0.07825238,
         0.07551062,  0.27925035, -0.1711426 ,  0.10263541],
       [-0.03360638,  0.18009534,  0.06068605,  0.22033158,  0.13452786,
         0.27748373, -0.27669674, -0.12128244, -0.28824085, -0.01995888,
        -0.21093123,  0.05250874,  0.2451599 ,  0.17403361,  0.3053464 ,
        -0.13963099,  0.3466846 ,  0.33395526, -0.09473532,  0.23875353,
         0.15241835, -0.34307525,  0.17392859,  0.2110469 ],
       [-0.25183618,  0.14839259, -0.34970933,  0.05598289,  0.14447358,
        -0.26069477,  0.06353721, -0.26189798, -0.14977498,  0.2958242 ,
         0.31688234, -0.22126867,  0.09729624,  0.1337187 , -0.14565504,
         0.02131838, -0.02332076,  0.23418924,  0.26519313, -0.11462915,
        -0.2700201 ,  0.1978772 ,  0.08797169,  0.280638  ],
       [ 0.04186842,  0.23152652,  0.10971659,  0.04911211,  0.07928976,
         0.05986354, -0.27222505,  0.31034324, -0.27358776,  0.03493005,
        -0.08326128,  0.19272646, -0.04331237,  0.13464081, -0.07129139,
        -0.14184117, -0.14268115, -0.07016438, -0.14242987,  0.1266351 ,
         0.0719938 , -0.06188381, -0.09320658,  0.13608676],
       [-0.11749816, -0.20936489,  0.2717946 , -0.24733937,  0.33987334,
         0.00093085,  0.19268665, -0.2522156 , -0.05417192,  0.23105958,
         0.35076872,  0.2270566 ,  0.26512948,  0.13422373, -0.22352673,
         0.12042543,  0.1074518 , -0.07135403,  0.3151798 , -0.10931551,
        -0.20612489, -0.29253587, -0.35329705,  0.07516503],
       [-0.22830188,  0.2431902 , -0.1801589 , -0.1331666 ,  0.06747425,
         0.27641663,  0.22340873, -0.29959294, -0.3250076 ,  0.292173  ,
         0.08798704,  0.30035022,  0.10785657,  0.10774192,  0.01508081,
        -0.19504896, -0.11368042, -0.07398921,  0.32137433, -0.2089473 ,
         0.16957507, -0.24074246, -0.01807392,  0.13508108],
       [ 0.10676715,  0.21265462,  0.30510548, -0.33462784,  0.11069018,
         0.32842794, -0.16315863, -0.17365353, -0.16439302,  0.2542918 ,
        -0.2744038 ,  0.10640889,  0.31729612,  0.0187771 , -0.00634563,
         0.22208735, -0.27701834, -0.06578964,  0.08938351,  0.20599797,
        -0.29770267, -0.09154254,  0.13844573,  0.32429597],
       [-0.1483102 , -0.35025564,  0.12648007, -0.12538797, -0.28168467,
        -0.29097587, -0.05864245, -0.1112617 ,  0.24358532,  0.2220268 ,
        -0.34092283, -0.23521397,  0.0032894 ,  0.11289978,  0.2661325 ,
         0.07312149,  0.31355044,  0.17003188, -0.2296017 , -0.28652522,
        -0.3209281 , -0.2881291 , -0.00427648,  0.25957808],
       [-0.26512027,  0.11592531, -0.18108898,  0.145396  ,  0.34372535,
         0.3170404 ,  0.22375658,  0.20468548,  0.07003313, -0.20909077,
        -0.06818619,  0.234871  , -0.1641538 ,  0.07452902,  0.27350155,
        -0.03129175,  0.03961653, -0.18051292,  0.14065254,  0.2512569 ,
        -0.24603829,  0.16144636,  0.00548744, -0.18835603],
       [-0.2546054 , -0.3238724 , -0.17534336, -0.23711176, -0.30105814,
         0.06486106,  0.20236048,  0.18337747,  0.00927535,  0.10390303,
        -0.24800688, -0.03596273,  0.11442649,  0.00052869, -0.15945753,
         0.19898632, -0.35082287,  0.3138738 ,  0.05398077, -0.3452978 ,
         0.11860216,  0.03714696,  0.1020838 ,  0.19419995],
       [ 0.20940551,  0.12355906,  0.03007221,  0.01913694,  0.19016382,
        -0.20076668,  0.08204231, -0.24089967, -0.32462877, -0.25284106,
        -0.19914614,  0.31236902, -0.34526765,  0.33472583, -0.19177912,
         0.05001348,  0.23894116,  0.20617262, -0.2865674 , -0.03064024,
         0.00216231, -0.02613738, -0.09269422, -0.15601078],
       [-0.12029739, -0.12242141,  0.30514672,  0.3022729 ,  0.13914934,
         0.2611498 ,  0.22950229, -0.28785866, -0.02738753,  0.18812236,
         0.10288376, -0.25740784, -0.14639725, -0.10173979,  0.21734735,
         0.07794005,  0.09299964, -0.28405052,  0.02832058, -0.21727273,
        -0.27811044,  0.3119792 , -0.2980407 ,  0.09170446],
       [ 0.28999874, -0.33822685,  0.09031826,  0.07790154,  0.3208271 ,
        -0.19466576, -0.00495741, -0.16131108,  0.29570767, -0.05485782,
        -0.13178173, -0.2332296 , -0.1014778 , -0.04049873,  0.12117422,
         0.32439247, -0.11137103,  0.25885138,  0.05133167,  0.06382608,
        -0.14171751, -0.07299075, -0.31195688, -0.015674  ],
       [-0.24101034, -0.3060211 ,  0.01263487, -0.0892252 , -0.14464259,
        -0.29471916,  0.32795396, -0.14124757,  0.17667285, -0.24580918,
         0.2648159 , -0.20777915,  0.20731488,  0.07836348,  0.02110243,
         0.02668807, -0.01711753, -0.09245121,  0.08398375,  0.21516928,
        -0.13493896, -0.07065564, -0.03912053, -0.03942788],
       [-0.34932622, -0.0452137 , -0.1893163 ,  0.0691188 ,  0.3466777 ,
        -0.2032149 , -0.15852095, -0.35352397, -0.04934645,  0.14170116,
        -0.06229219,  0.03529504,  0.09203491,  0.15567926,  0.16357031,
        -0.12469245, -0.1938659 ,  0.35244992,  0.24586955,  0.00204244,
         0.19446507, -0.26656002,  0.2726933 ,  0.06777248],
       [-0.09761623,  0.11395293, -0.3116349 , -0.07431912, -0.27281815,
         0.25977185, -0.18525174,  0.06640127, -0.00162357, -0.1595168 ,
         0.31478176,  0.34131965,  0.28229645, -0.32564932,  0.09276775,
         0.26381198,  0.17396119,  0.08528045,  0.28792956,  0.0706715 ,
        -0.24932829, -0.21226755, -0.03786322, -0.20063189],
       [ 0.02404132,  0.01786739,  0.03000897,  0.18182978,  0.25345662,
         0.18963519,  0.06878281,  0.14281198, -0.04677922, -0.21096621,
        -0.22145927, -0.17760143,  0.28211793,  0.3531789 ,  0.28899464,
         0.11841789,  0.0644798 ,  0.1800367 , -0.04330876, -0.3261403 ,
         0.18506637,  0.15446177,  0.33059397, -0.16272654],
       [-0.03227395,  0.02135262,  0.33501473,  0.25454178, -0.13084337,
         0.3014998 , -0.33768207,  0.00284642,  0.0220916 , -0.22621697,
         0.04491791,  0.10341522,  0.23697749,  0.253962  , -0.02034226,
         0.08682537, -0.02621156,  0.3222488 ,  0.20594433,  0.34800795,
        -0.29680318, -0.28986907, -0.16145067,  0.10984734],
       [ 0.15834948, -0.25549772,  0.02922681, -0.22124423, -0.30500197,
         0.16544923, -0.24058998, -0.27381247, -0.03295404, -0.25990707,
         0.25379637,  0.04482242, -0.08076212, -0.3016193 , -0.04600102,
         0.10878083,  0.09367821,  0.26416138,  0.20116445, -0.08390096,
         0.01330215,  0.1549426 , -0.11574687,  0.34467408],
       [-0.10280511,  0.315711  ,  0.27334872,  0.11543339, -0.31531498,
         0.23555872,  0.31586376,  0.31715968,  0.3246226 ,  0.2378023 ,
        -0.2146662 ,  0.08694491, -0.3023979 ,  0.10612711,  0.21437868,
        -0.03082865,  0.20387045,  0.00409719, -0.15189733, -0.1472251 ,
        -0.26348716, -0.13715309,  0.27208123, -0.3147075 ],
       [-0.29543668,  0.2266039 , -0.13373566,  0.11453548, -0.03771907,
        -0.29492348, -0.21508548,  0.30387202, -0.1987089 , -0.30211097,
         0.25349805, -0.2807023 , -0.170668  , -0.16952398,  0.15448025,
        -0.084254  , -0.28307325,  0.21271792, -0.27659923,  0.13983244,
        -0.17374592, -0.04296374, -0.33465365, -0.03102237],
       [ 0.0298785 , -0.13635163, -0.2934046 , -0.31586298, -0.09122339,
         0.2945986 , -0.10046661,  0.2584829 , -0.22927961, -0.10857272,
        -0.09143448,  0.108055  ,  0.26424673, -0.32061547, -0.04619977,
         0.26959214,  0.2313911 ,  0.12014633,  0.326642  ,  0.22689739,
         0.27124473, -0.29370326, -0.18846805,  0.2127147 ],
       [ 0.22612491, -0.35157156,  0.04531562, -0.10002753,  0.16818944,
         0.28728285,  0.14618129, -0.27747646, -0.07150322,  0.15984496,
         0.06509799,  0.18223462, -0.08202618,  0.06404433, -0.30315006,
         0.01691824, -0.33063948,  0.3247262 , -0.11340697, -0.07309318,
         0.03368858, -0.30588436,  0.1438236 ,  0.23550454],
       [-0.05994749, -0.09435281, -0.0505988 , -0.26509735,  0.09898314,
         0.08794707,  0.12323681,  0.2679557 , -0.16853893, -0.22466385,
         0.267875  , -0.23206079,  0.09269458, -0.12309821, -0.14844947,
        -0.25065303, -0.16102093, -0.08982345, -0.12485228, -0.21132666,
        -0.1258322 ,  0.01546991,  0.0439578 ,  0.10053092]],
      dtype=float32)>), (None, <tf.Variable 'dense_7/bias:0' shape=(24,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0.], dtype=float32)>), (None, <tf.Variable 'dense_8/kernel:0' shape=(24, 4) dtype=float32, numpy=
array([[ 3.52355957e-01,  8.70729685e-02,  1.07887328e-01,
         4.25787210e-01],
       [ 1.40169561e-02, -7.03971684e-02,  2.68490255e-01,
         1.17168903e-01],
       [ 3.78427029e-01, -2.81290948e-01, -1.82254583e-01,
        -1.93703324e-01],
       [-2.64704764e-01,  4.45892215e-02, -3.50191891e-01,
         1.30508542e-01],
       [ 1.99812651e-01, -9.01079476e-02, -2.53358424e-01,
        -1.18511945e-01],
       [ 2.42854357e-01,  2.66723275e-01,  1.49052322e-01,
        -1.43114030e-01],
       [ 2.19202936e-01,  1.72325969e-02,  6.08152151e-03,
         3.19135249e-01],
       [ 1.23509467e-01,  2.36151874e-01,  2.00604200e-01,
         4.58027363e-01],
       [ 1.18080735e-01,  9.51446295e-02, -2.23746166e-01,
        -8.21788907e-02],
       [ 2.76280105e-01, -3.37042302e-01,  3.36730838e-01,
         2.85709798e-02],
       [-4.01576042e-01, -2.19215944e-01, -4.93269265e-02,
         2.53732800e-01],
       [-3.71926665e-01,  3.90169680e-01, -2.67645121e-02,
         4.09781933e-04],
       [ 2.24769890e-01, -1.55871302e-01, -3.71965289e-01,
         2.55265236e-02],
       [ 3.09684455e-01, -8.40362608e-02, -1.28479809e-01,
         3.74023080e-01],
       [ 1.00040257e-01,  2.41596580e-01,  1.72085464e-02,
        -2.39659548e-02],
       [ 1.08409226e-01, -1.41943723e-01, -3.37419420e-01,
        -3.25466871e-01],
       [ 3.27681124e-01,  3.48701954e-01, -2.21850291e-01,
        -1.31646216e-01],
       [ 2.48454511e-01,  3.28459680e-01, -2.38772318e-01,
        -2.42148310e-01],
       [-1.89810038e-01,  3.14486742e-01,  8.23264718e-02,
         3.16635132e-01],
       [ 3.75294089e-02, -2.05379725e-02,  6.27508759e-02,
        -9.37405527e-02],
       [-3.48178715e-01,  3.41482341e-01, -3.76362294e-01,
        -4.08458501e-01],
       [ 4.28670645e-02, -1.22772098e-01,  7.97885656e-02,
        -1.52821094e-01],
       [-2.26624727e-01,  3.69507015e-01,  4.38303351e-02,
         3.39017212e-01],
       [-2.91272491e-01, -4.00184453e-01, -2.21973896e-01,
         3.96605790e-01]], dtype=float32)>), (None, <tf.Variable 'dense_8/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)).