In [8]:
import numpy as np
import random

class GridEnvironment:
    def __init__(self, grid_size, start, goal):
        self.grid_size = grid_size
        self.start = start
        self.goal = goal
        self.state = start

    def reset(self):
        self.state = self.start
        return self.state

    def step(self, action):
        x, y = self.state
        if action == 0:  # up
            x = max(0, x - 1)
        elif action == 1:  # down
            x = min(self.grid_size - 1, x + 1)
        elif action == 2:  # left
            y = max(0, y - 1)
        elif action == 3:  # right
            y = min(self.grid_size - 1, y + 1)
        
        self.state = (x, y)
        reward = -1
        done = False
        if self.state == self.goal:
            reward = 0
            done = True
        return self.state, reward, done

def q_learning(env, num_episodes, alpha, gamma, epsilon):
    q_table = np.zeros((env.grid_size, env.grid_size, 4))
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            if random.uniform(0, 1) < epsilon:
                action = random.choice([0, 1, 2, 3])
            else:
                action = np.argmax(q_table[state[0], state[1]])
            
            next_state, reward, done = env.step(action)
            old_value = q_table[state[0], state[1], action]
            next_max = np.max(q_table[next_state[0], next_state[1]])
            
            new_value = old_value + alpha * (reward + gamma * next_max - old_value)
            q_table[state[0], state[1], action] = new_value
            
            state = next_state
    return q_table

grid_size = 5
start = (0, 0)
goal = (4, 4)
num_episodes = 1000
alpha = 0.1
gamma = 0.6
epsilon = 0.1

env = GridEnvironment(grid_size, start, goal)
q_table = q_learning(env, num_episodes, alpha, gamma, epsilon)

print("Trained Q-table:")

Trained Q-table:


In [9]:
import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import Sequential

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(2,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(4)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
import tensorflow as tf
from tensorflow.keras.layers import Layer
# Defining a custom layer
class CustomDenseLayer(Layer):
    def __init__(self, units=32):
        super(CustomDenseLayer, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='zeros',
                                 trainable=True)
    def call(self, inputs):
        return tf.nn.relu(tf.matmul(inputs, self.w) + self.b)

In [15]:
model.compile(optimizer='adam', loss='categorical_crossentropy')
print("Model summary before building:")
model.summary()

# Build the model to show parameters
model.build()
print("\nModel summary after building:")
model.summary()

Model summary before building:



Model summary after building:
