In [10]:
import random

In [11]:
class FrozenLake:
    def __init__(self, map_size, frozen_ratio = 0.1, random_next_probability = 0.1):
        self.map_size = map_size
        self.frozen_ratio = frozen_ratio
        self.random_next_probability = random_next_probability
        
        self.state_list = list(range(0, map_size * map_size))
        self.action_list = list(range(0, 4))
        self.map = self.generate_random_map(self.map_size, self.frozen_ratio)
        

        
    def get_type(self, state):
        x, y = self.state_to_coordinate(state)
        return self.map[y][x]

    def modify(self, state, _type):
        if _type not in ["H", "F"]:
            print(f"{_type} is an invalid state type")
            return False
        else:
            x, y = self.state_to_coordinate(state)
            origin_type = self.map[y][x]
            self.map[y][x] = _type

            if self.is_valid(self.map):
                return True
            else:
                self.map[y][x] = origin_type
                print(f"Invalid path after applying {_type}")
                return False

    def get_map(self):
        return self.map

    def get_reward_map(self):
        reward_map = [[0] * self.map_size for _ in range(self.map_size)]
        for y in range(self.map_size):
            for x in range(self.map_size):
                state = self.coordinate_to_state(x, y)
                reward_map[y][x] = self.reward(state)
        return reward_map

    def get_states(self):
        return self.state_list

    def get_actions(self):
        return self.action_list

    def generate_random_map(self, size=10, p=0.8):
        """
        Generates a random valid map (one that has a path from start to goal).

        Args:
            size: size of each side of the grid
            p: probability that a tile is frozen

        Returns:
            A random valid map
        """
        valid = False
        board = []

        while not valid:
            board = [[" "] * size for _ in range(size)]
            for y in range(size):
                for x in range(size):
                    board[y][x] = "F" if (random.random() < p) else "H"
            board[0][0] = "S"
            board[size - 1][size - 1] = "G"
            valid = self.is_valid(board)

        return board

    def is_valid(self, board):
        max_size = len(board)
        frontier = []
        discovered = set()

        frontier.append([0, 0])

        while frontier:
            r, c = frontier.pop()
            pos = f"{r},{c}"
            if pos not in discovered:
                discovered.add(pos)
                directions = [[0, 1], [0, -1], [-1, 0], [1, 0]]
                for x, y in directions:
                    r_new, c_new = r + x, c + y
                    if not (0 <= r_new < max_size and 0 <= c_new < max_size):
                        continue
                    elif board[r_new][c_new] == "G":
                        return True
                    elif board[r_new][c_new] != "H":
                        frontier.append([r_new, c_new])
        return False

    def step(self, state, action):
        if random.random() < self.random_next_probability:
            action = random.choice(self.action_list)

        next_state = self.get_next_state(state, action)
        if self.get_type(next_state) == "H":
            next_state = 0

        reward = self.reward(next_state)
        finished = self.is_done(next_state)
        return next_state, reward, finished

    def is_done(self, state):
        x, y = self.state_to_coordinate(state)
        return self.map[y][x] == "G"

    def get_next_state(self, state, action):
        action_move = {0: [0, -1], 1: [0, 1], 2: [-1, 0], 3: [1, 0]}
        x, y = self.state_to_coordinate(state)
        move = action_move[action]
        next_x, next_y = x + move[0], y + move[1]
        if self.is_out(next_x, next_y):
            return state
        else:
            if self.map[next_y][next_x] == "H":
                return state
            return self.coordinate_to_state(next_x, next_y)

    def state_to_coordinate(self, state):
        y = state // self.map_size
        x = state % self.map_size
        return x, y

    def coordinate_to_state(self, x, y):
        return self.map_size * y + x

    def is_out(self, x, y):
        return not (0 <= x < self.map_size and 0 <= y < self.map_size)

    def reward(self, state):
        state_reward = {"S": 0, "F": 0, "H": 0, "G": 1}
        if state == -1:
            return 0
        x, y = self.state_to_coordinate(state)
        state_type = self.map[y][x]
        return state_reward[state_type]

    def new_map(self):
        self.map = self.generate_random_map(self.map_size, self.frozen_ratio)

env = FrozenLake(map_size = 5, frozen_ratio = 0.1, random_next_probability = 0.1)

In [12]:
class ChangingFrozenLake1(FrozenLake):
    def __init__(self):
        super().__init__(5, 1)
        self.map_list = [
        [["S", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "G"]],

        [["S", "F", "H", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["H", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "G"]],

        [["S", "F", "F", "F", "F"],
         ["F", "F", "H", "F", "F"],
         ["F", "H", "H", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "G"]],
        
        [["S", "F", "F", "H", "F"],
         ["F", "F", "F", "H", "F"],
         ["F", "F", "F", "F", "F"],
         ["H", "H", "F", "F", "F"],
         ["F", "F", "F", "F", "G"]],
        
        [["S", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "H", "H", "H", "F"],
         ["F", "F", "F", "F", "F"],
         ["F", "F", "F", "F", "G"]],         
        
        [["S", "F", "F", "F", "F"],
         ["F", "F", "H", "F", "F"],
         ["F", "F", "H", "F", "F"],
         ["F", "F", "H", "F", "F"],
         ["F", "F", "F", "F", "G"]],
        
        [["S", "F", "F", "F", "F"],
         ["F", "F", "F", "H", "F"],
         ["F", "F", "H", "F", "F"],
         ["F", "H", "F", "F", "F"],
         ["F", "F", "F", "F", "G"]]]

        self.map_idx = 0
        self.map = self.map_list[self.map_idx]

    def next_map(self):
        self.map_idx += 1
        if self.map_idx == len(self.map_list):
            return False
        self.map = self.map_list[self.map_idx]
        return True