In [4]:
import torch
import torch.nn as nn

# Define the object detection network
class ObjectDetectionNet(nn.Module):
    def __init__(self):
        super(ObjectDetectionNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 8 * 8, 64)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(64, 5)  # 5 classes (background + 4 object classes)
        
    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x


In [5]:
import numpy as np
import torch.optim as optim

# Define the training loop
def train(model, criterion, optimizer, train_loader):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100. * correct / total
    return train_loss, train_acc


In [2]:
import numpy as np
import random

# Define the reward matrix for the maze
R = np.array([[-1, -1, -1, -1, 0, -1],
              [-1, -1, -1, 0, -1, 100],
              [-1, -1, -1, 0, -1, -1],
              [-1, 0, 0, -1, 0, -1],
              [0, -1, -1, 0, -1, 100],
              [-1, 0, -1, -1, 0, 100]])

# Define the Q matrix for the maze
Q = np.zeros_like(R)

# Define the discount factor for future rewards
gamma = 0.8

# Define the number of episodes for training
num_episodes = 1000

# Define the maximum number of steps per episode
max_steps = 100

# Define the starting state for the agent
start_state = 0

# Define the goal state for the agent
goal_state = 5

# Define the exploration rate for the agent
epsilon = 0.1

# Define the learning rate for the agent
alpha = 0.8

# Define the function to choose the next action for the agent
def choose_action(state):
    if np.random.uniform(0, 1) < epsilon:
        action = np.random.choice(np.where(R[state, :] != -1)[0])
    else:
        action = np.argmax(Q[state, :])
    return action

# Define the function to update the Q matrix for the agent
def update_q_matrix(state, action, reward, next_state):
    max_q_next_state = np.max(Q[next_state, :])
    Q[state, action] = (1 - alpha) * Q[state, action] + alpha * (reward + gamma * max_q_next_state)

# Train the agent for the given number of episodes
for episode in range(num_episodes):
    state = start_state
    for step in range(max_steps):
        action = choose_action(state)
        next_state = action
        reward = R[state, action]
        update_q_matrix(state, action, reward, next_state)
        state = next_state
        if state == goal_state:
            break

# Test the agent on the maze
current_state = start_state
steps = [current_state]
while current_state != goal_state:
    next_state = np.argmax(Q[current_state, :])
    steps.append(next_state)
    current_state = next_state

# Print the results
print("Q-matrix:")
print(Q)
print("Path taken by agent:")
print(steps)


Q-matrix:
[[ 0  0  0  0 78  0]
 [49  0  0  0  0  0]
 [49  0  0  0  0  0]
 [61  0  0  0  0  0]
 [62  0  0 48  0 99]
 [ 0  0  0  0  0  0]]
Path taken by agent:
[0, 4, 5]


In [3]:
import numpy as np
import random

# Define the maze
maze = np.array([
    [0, 0, 0],
    [0, 1, 0],
    [0, 0, 2]
])

# Define the actions
actions = ["up", "down", "left", "right"]

# Define the rewards
rewards = {
    "1_1": -1,
    "3_2": 10,
    "2_1": -1,
    "2_3": -1
}

# Define the Q-table
q_table = np.zeros([3, 3, len(actions)])

# Define the hyperparameters
alpha = 0.1
gamma = 0.6
epsilon = 0.1

# Define the function to choose the action based on the epsilon-greedy policy
def choose_action(state):
    if random.uniform(0, 1) < epsilon:
        return random.choice(actions)
    else:
        return actions[np.argmax(q_table[state[0], state[1], :])]

# Define the function to update the Q-table
def update_q_table(state, action, reward, next_state):
    q_value = q_table[state[0], state[1], actions.index(action)]
    max_q_value = np.max(q_table[next_state[0], next_state[1], :])
    q_table[state[0], state[1], actions.index(action)] = q_value + alpha * (reward + gamma * max_q_value - q_value)

# Train the agent
for i in range(1000):
    state = [0, 0]
    while state != [2, 2]:
        action = choose_action(state)
        next_state = state.copy()
        if action == "up":
            next_state[0] -= 1
        elif action == "down":
            next_state[0] += 1
        elif action == "left":
            next_state[1] -= 1
        elif action == "right":
            next_state[1] += 1
        if next_state[0] < 0 or next_state[0] > 2 or next_state[1] < 0 or next_state[1] > 2:
            next_state = state
        reward = rewards.get(str(state[0]) + "_" + str(state[1]), -0.1)
        update_q_table(state, action, reward, next_state)
        state = next_state.copy()

# Print the learned Q-table
print(q_table)


[[[-0.2298676  -0.23446521 -0.22936158 -0.2176    ]
  [-0.21700691 -0.64635976 -0.22852472 -0.196     ]
  [-0.1936281  -0.16       -0.21226303 -0.19388627]]

 [[-0.23009504 -0.23073399 -0.23081727 -0.26459878]
  [-1.02116478 -1.04637897 -1.01382255 -1.01299208]
  [-0.19130069 -0.1        -0.61704993 -0.15666312]]

 [[-0.23043426 -0.23001624 -0.22992495 -0.32632668]
  [-0.81933908 -0.76866501 -0.75111902 -0.74581342]
  [ 0.          0.          0.          0.        ]]]


In [14]:
import numpy as np

# Define the environment
class Maze:
    def __init__(self):
        self.maze = np.zeros((2, 3))
        self.maze[0, 2] = 1  # Goal
        self.maze[1, 2] = -1  # Hole
        self.agent_pos = [0, 0]
        
    def get_state(self):
        return tuple(self.agent_pos)
    
    def move_agent(self, action):
        if action == 0:  # Move right
            if self.agent_pos[1] < 2:
                self.agent_pos[1] += 1
        elif action == 1:  # Move down
            if self.agent_pos[0] < 1:
                self.agent_pos[0] += 1
        elif action == 2:  # Move left
            if self.agent_pos[1] > 0:
                self.agent_pos[1] -= 1
        elif action == 3:  # Move up
            if self.agent_pos[0] > 0:
                self.agent_pos[0] -= 1
    
    def get_reward(self):
        if tuple(self.agent_pos) == (0, 2):
            return 1.0  # Reach the goal
        elif tuple(self.agent_pos) == (1, 2):
            return -1.0  # Fall into the hole
        else:
            return 0.0
    
    def reset(self):
        self.agent_pos = [0, 0]

# Define the Q-learning agent
class QLearningAgent:
    def __init__(self, n_actions, alpha, gamma, epsilon):
        self.n_actions = n_actions
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = {}
    
    def act(self, state):
        if np.random.uniform() < self.epsilon:
            return np.random.choice(self.n_actions)
        elif state in self.q_table:
            return np.argmax(self.q_table[state])
        else:
            return np.random.choice(self.n_actions)
        
    def learn(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(self.n_actions)
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros(self.n_actions)
        td_target = reward + self.gamma * np.max(self.q_table[next_state])
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.alpha * td_error

# Initialize the environment and agent
env = Maze()
agent = QLearningAgent(n_actions=4, alpha=0.1, gamma=0.9, epsilon=0.1)

# Train the agent
for i_episode in range(1000):
    env.reset()
    state = env.get_state()
    done = False
    while not done:
        action = agent.act(state)
        env.move_agent(action)
        next_state = env.get_state()
        reward = env.get_reward()
        agent.learn(state, action, reward, next_state)
        state = next_state
        if reward != 0:  # Stop if the agent reaches the goal or falls into the hole
            done = True

# Test the agent
    env.reset()
    state = env.get_state()
    done = False
    steps = 0
    while not done:
        action = agent.act(state)
        env.move_agent(action)
        next_state = env.get_state()
        reward = env.get_reward()
        agent.learn(state, action, reward, next_state)
        state = next_state
        steps += 1
        if reward != 0 or steps >= 20:  # Stop if the agent reaches the goal or takes more than 20 steps
            done = True
    if reward == 1.0:
        print("Success! The agent reached the goal in", steps, "steps.")
    else:
        print("Failure. The agent could not reach the goal.")


Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 3 steps.
Success! The agent reached the goal in 3 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 3 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2 steps.
Success! The agent reached the goal in 2