In [None]:
import numpy as np
import pandas as pd

In [None]:

class RlAgent:
    """RL agent for the Roomba"""

    def __init__(self):
        self.q = np.zeros((16*16, 4), dtype="float64") # to be corrected
        self.state = 0
        self.next_state = 0
        self.reward = 0
        self.action = 0
        self.turn = 0
        self.epsilon = 1
        self.alpha = 0.1
        self.gamma = 0.9
        self.number_of_states = 11 # to be corrected
        self.number_of_actions = 4 #  to be corrected

    def get_number_of_states(self):
        return self.number_of_states

    def get_number_of_actions(self):
        return self.number_of_actions

    def e_greedy(self, actions):
        a_star_idx = np.argmax(actions)
        rng = np.random.default_rng()
        if self.epsilon <= rng.random():
            return a_star_idx
        else:
            b = actions.size
            idx = rng.integers(low=0, high=b)
            return idx

    def select_action(self, state):
        self.turn += 1
        # print("Turn = ", self.turn)
        self.state = state
        # print("State = ", self.state)
        actions = self.q[state, ]
        action = self.e_greedy(actions)
        self.action = action
        return action

    def update_q(self, new_state, reward):
        self.next_state = new_state
        self.q[self.state, self.action] = reward + (self.gamma * max(self.q[new_state, ]))
        f"Turn = {self.turn} \nQ = {self.q}"

In [None]:
import random

class RoombaEnv():

    TERMINAL_STATE_GOOD = 4 #  to be corrected
    TERMINAL_STATE_BAD = 7 #  to be corrected
    START_STATE = 8 #  to be corrected
    NUMBER_OF_STATES = 11 #  to be corrected
    NUMBER_OF_ACTIONS = 4 #  to be corrected

    def __init__(self, map, charging_location):
        """
        Set class properties with environment constants
        """
        self.terminal_states = (RoombaEnv.TERMINAL_STATE_GOOD, RoombaEnv.TERMINAL_STATE_BAD)
        self.current_state = RoombaEnv.START_STATE
        self.map = map
        self.charge_loc = charging_location

    def get_number_of_states(self) -> int:
        # Environment constant pass thru
        return RoombaEnv.NUMBER_OF_STATES

    def get_number_of_actions(self) -> int:
        # Environment constant pass thru
        return RoombaEnv.NUMBER_OF_ACTIONS

    def reset(self, start_state: int = START_STATE, es_flag: bool = False) -> int:
        """
        Reset the state of the game to a determined start_state if es_flag is False
        Otherwise if es_flag is True then reset game to a random start state
        Return the resulting state for agent to act on.
        """
        if es_flag:
            start_state = random.randint(1, RoombaEnv.NUMBER_OF_STATES)
        self.set_state(start_state)
        return self.get_state()

    def get_state(self) -> int:
        """
        Return current environment state.
        """
        return self.current_state

    def set_state(self, state: int):
        """
        Set the current environment state resulting from agent action.
        """
        self.current_state = state

    def execute_action(self, action: int) -> (int, float, bool):
        """
        Given an action, determine the resulting next_state. 
        Based on next_state determine the resulting reward for getting there.
        Update current environment state and find if it is an end state.
        """
        current_state = self.get_state()
        next_state = RoombaEnv.get_next_state(current_state, action)
        reward = RoombaEnv.get_reward(next_state)
        self.set_state(next_state)
        done = self.get_terminal_flag()
        return next_state, reward, done

    def get_next_state(state: int, action: int) -> int:
        """
        Deterministic
        Given the current state and an action return the next state grid position
        state: Grid with 3 rows, {1,2,3,4}, {5,6,null,7}, {8,9,10,11}
        action: 'up'=0, 'down'=1, 'left'=2, 'right'=3
        """
        # define grid as current state for key and tuple next states for value
        grid = {1:(1,5,1,2), 2:(2,6,1,3), 3:(3,3,2,4), 4:(4,4,4,4), 5:(1,8,5,6),
                6:(2,9,5,6), 7:(7,7,7,7), 8:(5,8,8,9), 9:(6,9,8,10), 10:(10,10,9,11),
                11:(7,11,10,11)}
        return grid[state][action]

    def get_reward(next_state: int) -> float:
        """
        Static function, given a state returns a deterministic reward.
        Default is -1 unless given a terminal state providing 25 or -25.
        """
        reward = -1.0
        if next_state == RoombaEnv.TERMINAL_STATE_GOOD:
            reward = 25.0
        elif next_state == RoombaEnv.TERMINAL_STATE_BAD:
            reward = -25.0

        return reward

    def get_terminal_flag(self) -> bool:
        """
        Return if current state is in a list of set terminal states
        """
        return self.get_state() in self.terminal_states


In [16]:
import random

def generate_roomba_map(dirt_probability=0.2):
    """
    Generates a 16x16 tile map for a Roomba simulation.
    
    Args:
    dirt_probability: The probability of a tile being dirty (float between 0 and 1).
    
    Returns:
    A 2D list representing the map.
    """

    # Create a 16x16 grid with walls around the perimeter
    map = [['#' for _ in range(16)] for _ in range(16)]

    # Choose a random side for the charging station
    side = random.randint(0, 3)

    # Place the charging station (@) on a random wall tile
    rnd_pos = random.randint(1, 14)
    start_pos = (rnd_pos, 1) # Default Left wall
    if side == 0:  # Top
        map[1][rnd_pos] = '@'
        start_pos = (1, rnd_pos)
    elif side == 1:  # Right
        map[rnd_pos][14] = '@'
        start_pos = (rnd_pos, 14)
    elif side == 2:  # Bottom
        map[14][rnd_pos] = '@'
        start_pos = (14, rnd_pos)
    else:  # Left
        map[rnd_pos][1] = '@'
      
    for row in range(1, 15):
        for col in range(1, 15):
            if map[row][col] == '@':
                continue
            # Add a clear tile with a chance of dirt
            if random.random() < dirt_probability:
                map[row][col] = '*'
            else:
                map[row][col] = '.'

    return map, start_pos

# Example usage:
room_map, charging_base = generate_roomba_map(dirt_probability=0.3)

# Print the map
for row in room_map:
  print(' '.join(row)) 

# # # # # # # # # # # # # # # #
# . * . . . . . . . . . . . . #
# * . . . . . . * . . . . . * #
# . . . . . . * . * . . * . . #
# . * . . . . . . . * . . . * #
# . * * * . . . . . * * . . * #
# * . * * * * . . * . . . . . #
# . . * * . . . . . . * . * . #
# . * . * . . * . * * . . . . #
# . . . . * . . . . . . . . . #
# * * . . . . . . . . . . * * #
# * . . * . . . . . * . . . * #
# . . . * . . . . . . * . * . #
# . . . . . . . . . . * . . * #
# . * . . * . * . . . . . @ * #
# # # # # # # # # # # # # # # #


In [None]:
def main():
    environment = RoombaEnv(room_map, charging_base)
    agent = RlAgent(charging_base)
    # Check that the environment parameters match
    if (environment.get_number_of_states() == agent.get_number_of_states()) and \
            (environment.get_number_of_actions() == agent.get_number_of_actions()):
        # Play 100 games
        for i in range(100):
            # reset the game and observe the current state
            current_state = environment.reset()
            game_end = False
            # Do until the game ends:
            while not game_end:
                action = agent.select_action(current_state)
                new_state, reward, game_end = environment.execute_action(action)
                agent.update_q(new_state, reward)
                current_state = new_state
        with open('Project1.txt', 'wt') as f:
            print(agent.q, file=f)
        print("\nProgram completed successfully.")
    else:
        print("Environment and Agent parameters do not match. Terminating program.")