In [1]:
import numpy as np
import pandas as pd

In [19]:

class RlAgent:
    """RL agent for the Roomba"""

    def __init__(self):
        self.q = np.zeros((62209, 8), dtype="float64") 
        self.state = 0
        self.next_state = 0
        self.reward = 0
        self.action = 0
        self.turn = 0
        self.epsilon = 1
        self.alpha = 0.1
        self.gamma = 0.9
        self.number_of_states = 62209 # 16x16 * 3_batt_lvl * 3^4 1_step peek around + dead_battery
        self.number_of_actions = 8 # 4 cardinal directions for power on and off

    def get_number_of_states(self):
        return self.number_of_states

    def get_number_of_actions(self):
        return self.number_of_actions

    def e_greedy(self, actions):
        a_star_idx = np.argmax(actions)
        rng = np.random.default_rng()
        if self.epsilon <= rng.random():
            return a_star_idx
        else:
            b = actions.size
            idx = rng.integers(low=0, high=b)
            return idx

    def select_action(self, state):
        self.turn += 1
        # print("Turn = ", self.turn)
        self.state = state
        # print("State = ", self.state)
        actions = self.q[state, ]
        action = self.e_greedy(actions)
        self.action = action
        return action

    def update_q(self, new_state, reward):
        self.next_state = new_state
        self.q[self.state, self.action] = reward + (self.gamma * max(self.q[new_state, ]))
        f"Turn = {self.turn} \nQ = {self.q}"

In [3]:
import random

def generate_roomba_map(dirt_probability=0.2):
    """
    Generates a 16x16 tile map for a Roomba simulation.
    
    Args:
    dirt_probability: The probability of a tile being dirty (float between 0 and 1).
    
    Returns:
    A 2D list representing the map.
    """

    # Create a 16x16 grid with walls around the perimeter
    map = [['#' for _ in range(16)] for _ in range(16)]

    # Choose a random side for the charging station
    side = random.randint(0, 3)

    # Place the charging station (@) on a random wall tile
    rnd_pos = random.randint(1, 14)
    start_pos = (rnd_pos, 1) # Default Left wall
    if side == 0:  # Top
        map[1][rnd_pos] = '@'
        start_pos = (1, rnd_pos)
    elif side == 1:  # Right
        map[rnd_pos][14] = '@'
        start_pos = (rnd_pos, 14)
    elif side == 2:  # Bottom
        map[14][rnd_pos] = '@'
        start_pos = (14, rnd_pos)
    else:  # Left
        map[rnd_pos][1] = '@'
      
    for row in range(1, 15):
        for col in range(1, 15):
            if map[row][col] == '@':
                continue
            # Add a clear tile with a chance of dirt
            if random.random() < dirt_probability:
                map[row][col] = '*'
            else:
                map[row][col] = '.'

    return map, start_pos

# Example usage:
room_map, charging_base = generate_roomba_map(dirt_probability=0.3)

# Print the map
for row in room_map:
  print(' '.join(row)) 

# # # # # # # # # # # # # # # #
# . . * . . . . . * * . . . . #
# * . . * . * . . . . . * * . #
# . . . . . . . . * . . . * * #
# . . . . . . . . * * . . * * #
# * . * * . . . . . . * . . . #
# . . * . . . . . . * . . . . #
# . . . . * * . . . . . . * . #
# . . . . * . . * . . * * . * #
# . * . . * . . * . . . * * . #
# . . . * . . * . * . * * * . #
# . * * * . * * * * . . . * . #
# * . * . . . . * . . * . * . #
# . . . . * . . * * . . . . . #
# . @ . . . * . . . * . * * . #
# # # # # # # # # # # # # # # #


In [20]:
import random

class RoombaEnv():

    TERMINAL_STATE_GOOD = 0 #  to be corrected
    TERMINAL_STATE_BAD = 0 #  to be corrected
    START_STATE = 0 #  to be corrected
    NUMBER_OF_STATES = 62209 # 16x16 * 3_batt_lvl * 3^4 1_step peek around + dead_battery
    NUMBER_OF_ACTIONS = 8 #  N_on, N_off, S_on, S_off, E_on, E_off, W_on, W_off
    NUMBER_OF_BATTLVL = 3 # High, Medium, Low, Dead
    NUM_CLEANLINESS_LEVELS = 3 # Clean, Dirty, Impassable

    def __init__(self, map, charging_location):
        """
        Set class properties with environment constants
        
        """
        self.terminal_states = (RoombaEnv.TERMINAL_STATE_GOOD, RoombaEnv.TERMINAL_STATE_BAD)
        self.current_state = RoombaEnv.START_STATE
        self.map = map
        self.charge_loc = charging_location
        self.roomba_loc = np.array([charging_location[0], charging_location[1]])
        self.battery_lvl = 100
        self.total_states = RoombaEnv.NUMBER_OF_STATES

    def get_number_of_states(self) -> int:
        # Environment constant pass thru
        return RoombaEnv.NUMBER_OF_STATES

    def get_number_of_actions(self) -> int:
        # Environment constant pass thru
        return RoombaEnv.NUMBER_OF_ACTIONS

    def reset(self, start_state: int = START_STATE, es_flag: bool = False) -> int:
        """
        Reset the state of the game to a determined start_state if es_flag is False
        Otherwise if es_flag is True then reset game to a random start state
        Return the resulting state for agent to act on.
        """
        if es_flag:
            start_state = random.randint(1, RoombaEnv.NUMBER_OF_STATES)
        self.set_state(start_state)
        return self.get_state()

    def get_state(self) -> int:
        """
        Return current environment state.
        """
        return self.current_state

    def set_state(self, state: int):
        """
        Set the current environment state resulting from agent action.
        """
        self.current_state = state

    def execute_action(self, action: int) -> (int, float, bool):
        """
        Given an action, determine the resulting next_state. 
        Based on next_state determine the resulting reward for getting there.
        Update current environment state and find if it is an end state.
        """
        current_state = self.get_state()
        next_state = self.get_next_state(current_state, action)
        reward = RoombaEnv.get_reward(next_state)
        self.set_state(next_state)
        done = self.get_terminal_flag()
        return next_state, reward, done

    def get_next_state(self, state: int, action: int) -> int:
        """
        action: 'N_off'=0, 'S_off'=1, 'E_off'=2, 'W_off'=3, 'N_on'=4, 'S_on'=5, 'E_on'=6, 'W_on'=7
        """
        ## Find battery level
        power_on = action > 3
        self.battery_lvl -= 0.09 # default power reduction
        if power_on:
            self.battery_lvl -= 0.1 # power is on, reduce power more
        print(self.battery_lvl)

        battery_state = -1
        if self.battery_lvl > 66:
            battery_state = 2
        elif self.battery_lvl > 33:
            battery_state = 1
        elif self.battery_lvl > 0:
            battery_state = 0

        ## Check map movement
        new_loc = np.copy(self.roomba_loc)
        action_mod = action % 4
        if action_mod == 0:
            new_loc[0] -= 1
        elif action_mod == 1:
            new_loc[0] += 1
        elif action_mod ==2:
            new_loc[1] -= 1
        else:
            new_loc[1] += 1
        new_loc_tile = self._interpret_loc(new_loc)
        
        print("Current spot: {} ({},{})".format(self._interpret_loc(self.roomba_loc), self.roomba_loc[0], self.roomba_loc[1]))
        print("Next spot: {} ({},{})".format(new_loc_tile, new_loc[0], new_loc[1]))
        if new_loc_tile != '#':
            self.roomba_loc = new_loc # not a wall so update
        else:
            new_loc_tile = self._interpret_loc(self.roomba_loc)
            print('Hit a wall, revert')

        ## Check 1 step peek ahead
        north_tile = self._interpret_loc([self.roomba_loc[0]-1,self.roomba_loc[1]])
        south_tile = self._interpret_loc([self.roomba_loc[0]+1,self.roomba_loc[1]])
        east_tile = self._interpret_loc([self.roomba_loc[0],self.roomba_loc[1]+1])
        west_tile = self._interpret_loc([self.roomba_loc[0],self.roomba_loc[1]-1])

        tile_type = ['.','*','#']
        step_peek = [north_tile, south_tile, east_tile, west_tile]
        peek_vals = [tile_type.index(symbol) if symbol in tile_type else 0 for symbol in step_peek]
        
        if battery_state == -1:
            state = 0
        else:
            state = self._encode_state(self.roomba_loc[0], self.roomba_loc[1], battery_state, \
                                   peek_vals[0], peek_vals[1], peek_vals[2], peek_vals[3]) + 1
        
        return state

    def get_reward(next_state: int) -> float:
        """
        Static function, given a state returns a deterministic reward.
        Default is -1 unless given a terminal state providing 25 or -25.
        """
        reward = -1.0
        if next_state == RoombaEnv.TERMINAL_STATE_GOOD:
            reward = 25.0
        elif next_state == RoombaEnv.TERMINAL_STATE_BAD:
            reward = -25.0

        return reward

    def get_terminal_flag(self) -> bool:
        """
        Return if current state is in a list of set terminal states
        """
        return self.get_state() in self.terminal_states

    def _interpret_loc(self, loc):
        return self.map[loc[0]][loc[1]]

    def _encode_state(self, y, x, battery, north, south, east, west):
        """Encodes the state variables into a single integer.
    
        Args:
            x: x-coordinate (0-15).
            y: y-coordinate (0-15).
            battery: Battery level (0-2).
            north: Cleanliness level of north (0-2).
            south: Cleanliness level of south (0-2).
            east: Cleanliness level of east (0-2).
            west: Cleanliness level of west (0-2).
    
        Returns:
            An integer representing the encoded state.
        """
    
        state = x + \
                y * 16 + \
                battery * 16 * 16 + \
                north * 16 * 16 * RoombaEnv.NUMBER_OF_BATTLVL + \
                south * 16 * 16 * RoombaEnv.NUMBER_OF_BATTLVL * RoombaEnv.NUM_CLEANLINESS_LEVELS + \
                east * 16 * 16 * RoombaEnv.NUMBER_OF_BATTLVL * RoombaEnv.NUM_CLEANLINESS_LEVELS**2 + \
                west * 16 * 16 * RoombaEnv.NUMBER_OF_BATTLVL * RoombaEnv.NUM_CLEANLINESS_LEVELS**3 
    
        return state

In [21]:
def main():
    global room_map, charging_base

    #show map
    for row in room_map:
        print(' '.join(row))
    
    environment = RoombaEnv(room_map, charging_base)
    agent = RlAgent()
    # Check that the environment parameters match
    if (environment.get_number_of_states() == agent.get_number_of_states()) and \
            (environment.get_number_of_actions() == agent.get_number_of_actions()):
        # Play 100 games
        for i in range(1):
            # reset the game and observe the current state
            current_state = environment.reset()
            game_end = False
            # Do until the game ends:
            while not game_end:
                action = agent.select_action(current_state)
                new_state, reward, game_end = environment.execute_action(action)
                agent.update_q(new_state, reward)
                current_state = new_state
        with open('Project1.txt', 'wt') as f:
            print(agent.q, file=f)
        print("\nProgram completed successfully.")
    else:
        print("Environment and Agent parameters do not match. Terminating program.")
main()

# # # # # # # # # # # # # # # #
# . . * . . . . . * * . . . . #
# * . . * . * . . . . . * * . #
# . . . . . . . . * . . . * * #
# . . . . . . . . * * . . * * #
# * . * * . . . . . . * . . . #
# . . * . . . . . . * . . . . #
# . . . . * * . . . . . . * . #
# . . . . * . . * . . * * . * #
# . * . . * . . * . . . * * . #
# . . . * . . * . * . * * * . #
# . * * * . * * * * . . . * . #
# * . * . . . . * . . * . * . #
# . . . . * . . * * . . . . . #
# . @ . . . * . . . * . * * . #
# # # # # # # # # # # # # # # #
99.91
Current spot: @ (14,2)
Next spot: . (14,3)
99.72
Current spot: . (14,3)
Next spot: . (13,3)
99.53
Current spot: . (13,3)
Next spot: . (14,3)
99.34
Current spot: . (14,3)
Next spot: # (15,3)
Hit a wall, revert
99.25
Current spot: . (14,3)
Next spot: . (13,3)
99.16
Current spot: . (13,3)
Next spot: . (13,2)
98.97
Current spot: . (13,2)
Next spot: . (12,2)
98.78
Current spot: . (12,2)
Next spot: . (13,2)
98.69
Current spot: . (13,2)
Next spot: . (13,1)
98.5
Current spot: . (13,1)
