In [26]:
import numpy as np

def read_grid_world(file_path):
    with open(file_path, 'r') as file:
        grid = [line.split() for line in file]
    return np.array(grid)

def get_neighbors(grid, row, col):
    neighbors = {
        'up': (row - 1, col),
        'down': (row + 1, col),
        'left': (row, col - 1),
        'right': (row, col + 1)
    }
    valid_neighbors = {}
    for action, (r, c) in neighbors.items():
        if 0 <= r < grid.shape[0] and 0 <= c < grid.shape[1] and grid[r, c] != 'x':
            valid_neighbors[action] = (r, c)
        else:
            valid_neighbors[action] = (row, col)  # stay in the same state if out of bounds or wall
    return valid_neighbors

def calculate_rewards_and_transitions(grid):
    states = {}
    rewards = {}
    transitions = {}
    for row in range(grid.shape[0]):
        for col in range(grid.shape[1]):
            state = (row, col)
            if grid[row, col] == 'G':
                # NOTE: I'm not really sure if I should assign reward 1 if I reach the goal state
                rewards[state] = {action: 10 for action in ['up', 'down', 'left', 'right']}
                transitions[state] = {action: {state: 1.0} for action in ['up', 'down', 'left', 'right']}
            elif grid[row, col] == '.':
                rewards[state] = {}
                transitions[state] = {}
                neighbors = get_neighbors(grid, row, col)
                for action, (r, c) in neighbors.items():
                    if grid[r, c] == 'G':
                        rewards[state][action] = 10
                    else:
                        rewards[state][action] = -1
                    transitions[state][action] = {(r, c): 1.0}
    return rewards, transitions

def print_rewards_and_transitions(rewards, transitions):
    for state, actions in rewards.items():
        print(f"State {state}:")
        for action, reward in actions.items():
            print(f"  Action {action}: Reward = {reward}, Transition = {transitions[state][action]}")

# Convert the reward to a one hot vector according the number of actions per each step
def print_one_hot_rewards(rewards, transitions):
    num_actions = len(rewards[(0, 0)])
    for state, actions in rewards.items():

        one_hot_rewards = list(actions.values())
        print("State", state, one_hot_rewards)

def group_states_by_one_hot_reward(rewards, transitions):
    one_hot_rewards = {}
    for state, actions in rewards.items():
        one_hot_reward = tuple(actions.values())
        if one_hot_reward not in one_hot_rewards:
            one_hot_rewards[one_hot_reward] = []
        one_hot_rewards[one_hot_reward].append(state)
    return one_hot_rewards

# Example usage
file_path = 'custom_envs/grid_world.txt'
grid = read_grid_world(file_path)
print(grid)

rewards, transitions = calculate_rewards_and_transitions(grid)
# print_rewards_and_transitions(rewards, transitions)

# print_one_hot_rewards(rewards, transitions)
one_hot_rewards = group_states_by_one_hot_reward(rewards, transitions)

# print groups and name them as bisimulated states c1, c2, c3, ...
for i, (one_hot_reward, states) in enumerate(one_hot_rewards.items()):
    print(f"c{i + 1} with rewards: ", one_hot_reward)
    print(f"States in c{i + 1}: ", states)



[['G' '.' '.' 'G']
 ['.' '.' '.' '.']
 ['.' '.' '.' '.']
 ['G' '.' '.' 'G']]
c1 with rewards:  (10, 10, 10, 10)
States in c1:  [(0, 0), (0, 3), (3, 0), (3, 3)]
c2 with rewards:  (-1, -1, 10, -1)
States in c2:  [(0, 1), (3, 1)]
c3 with rewards:  (-1, -1, -1, 10)
States in c3:  [(0, 2), (3, 2)]
c4 with rewards:  (10, -1, -1, -1)
States in c4:  [(1, 0), (1, 3)]
c5 with rewards:  (-1, -1, -1, -1)
States in c5:  [(1, 1), (1, 2), (2, 1), (2, 2)]
c6 with rewards:  (-1, 10, -1, -1)
States in c6:  [(2, 0), (2, 3)]


In [27]:
# Example usage
file_path = 'custom_envs/grid_world2.txt'
grid = read_grid_world(file_path)
print(grid)

rewards, transitions = calculate_rewards_and_transitions(grid)
# print_rewards_and_transitions(rewards, transitions)

# print_one_hot_rewards(rewards, transitions)
one_hot_rewards = group_states_by_one_hot_reward(rewards, transitions)

# print groups and name them as bisimulated states c1, c2, c3, ...
for i, (one_hot_reward, states) in enumerate(one_hot_rewards.items()):
    print(f"c{i + 1} with rewards: ", one_hot_reward)
    print(f"States in c{i + 1}: ", states)


[['G' '.' '.' 'G']
 ['.' 'x' '.' '.']
 ['.' '.' 'x' '.']
 ['G' '.' '.' 'G']]
c1 with rewards:  (10, 10, 10, 10)
States in c1:  [(0, 0), (0, 3), (3, 0), (3, 3)]
c2 with rewards:  (-1, -1, 10, -1)
States in c2:  [(0, 1), (3, 1)]
c3 with rewards:  (-1, -1, -1, 10)
States in c3:  [(0, 2), (3, 2)]
c4 with rewards:  (10, -1, -1, -1)
States in c4:  [(1, 0), (1, 3)]
c5 with rewards:  (-1, -1, -1, -1)
States in c5:  [(1, 2), (2, 1)]
c6 with rewards:  (-1, 10, -1, -1)
States in c6:  [(2, 0), (2, 3)]


In [28]:
# Example usage
file_path = 'custom_envs/grid_world3.txt'
grid = read_grid_world(file_path)
print(grid)

rewards, transitions = calculate_rewards_and_transitions(grid)
# print_rewards_and_transitions(rewards, transitions)

# print_one_hot_rewards(rewards, transitions)
one_hot_rewards = group_states_by_one_hot_reward(rewards, transitions)

# print groups and name them as bisimulated states c1, c2, c3, ...
for i, (one_hot_reward, states) in enumerate(one_hot_rewards.items()):
    print(f"c{i + 1} with rewards: ", one_hot_reward)
    print(f"States in c{i + 1}: ", states)


[['.' '.' '.' '.' '.' 'G']
 ['.' '.' 'x' '.' '.' 'G']
 ['.' '.' 'x' '.' '.' 'G']
 ['.' '.' 'x' '.' '.' 'G']
 ['.' '.' '.' '.' '.' 'G']
 ['.' '.' 'x' '.' '.' 'G']
 ['.' '.' 'x' '.' '.' 'G']
 ['.' '.' 'x' '.' '.' 'G']
 ['.' '.' '.' '.' '.' 'G']]
c1 with rewards:  (-1, -1, -1, -1)
States in c1:  [(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 3), (2, 0), (2, 1), (2, 3), (3, 0), (3, 1), (3, 3), (4, 0), (4, 1), (4, 2), (4, 3), (5, 0), (5, 1), (5, 3), (6, 0), (6, 1), (6, 3), (7, 0), (7, 1), (7, 3), (8, 0), (8, 1), (8, 2), (8, 3)]
c2 with rewards:  (-1, -1, -1, 10)
States in c2:  [(0, 4), (1, 4), (2, 4), (3, 4), (4, 4), (5, 4), (6, 4), (7, 4), (8, 4)]
c3 with rewards:  (10, 10, 10, 10)
States in c3:  [(0, 5), (1, 5), (2, 5), (3, 5), (4, 5), (5, 5), (6, 5), (7, 5), (8, 5)]


In [29]:
import numpy as np

# Define the grid world environment
grid = [
    ['G', '.', '.', 'G'],
    ['.', '.', '.', '.'],
    ['.', '.', '.', '.'],
    ['G', '.', '.', 'G']
]

# Define actions: up, down, left, right
actions = ['U', 'D', 'L', 'R']

# Define rewards and transitions
rewards = {'G': 1, '.': 0, 'x': -1}
transitions = {
    'U': (-1, 0),
    'D': (1, 0),
    'L': (0, -1),
    'R': (0, 1)
}

def is_valid(state, grid):
    rows, cols = len(grid), len(grid[0])
    return 0 <= state[0] < rows and 0 <= state[1] < cols and grid[state[0]][state[1]] != 'x'

def get_next_state(state, action, grid):
    move = transitions[action]
    next_state = (state[0] + move[0], state[1] + move[1])
    if is_valid(next_state, grid):
        return next_state
    return state  # Stay in the same state if the move is invalid

def get_bisimulation_groups(grid):
    rows, cols = len(grid), len(grid[0])
    states = [(i, j) for i in range(rows) for j in range(cols) if grid[i][j] != 'x']
    equivalence_classes = {state: state for state in states}

    def find(state):
        if equivalence_classes[state] != state:
            equivalence_classes[state] = find(equivalence_classes[state])
        return equivalence_classes[state]

    def union(state1, state2):
        root1, root2 = find(state1), find(state2)
        if root1 != root2:
            equivalence_classes[root2] = root1

    for s1 in states:
        for s2 in states:
            if s1 != s2:
                same_rewards = all(rewards[grid[s1[0]][s1[1]]] == rewards[grid[s2[0]][s2[1]]] for a in actions)
                same_transitions = all(find(get_next_state(s1, a, grid)) == find(get_next_state(s2, a, grid)) for a in actions)
                if same_rewards and same_transitions:
                    union(s1, s2)

    groups = {}
    for state in states:
        root = find(state)
        if root not in groups:
            groups[root] = []
        groups[root].append(state)

    return groups

# Get bisimulation groups
bisimulation_groups = get_bisimulation_groups(grid)

# Print the bisimulation groups
for root, group in bisimulation_groups.items():
    print(f"Bisimulation Group {root}: {group}")


Bisimulation Group (0, 0): [(0, 0)]
Bisimulation Group (0, 1): [(0, 1)]
Bisimulation Group (0, 2): [(0, 2)]
Bisimulation Group (0, 3): [(0, 3)]
Bisimulation Group (1, 0): [(1, 0)]
Bisimulation Group (1, 1): [(1, 1)]
Bisimulation Group (1, 2): [(1, 2)]
Bisimulation Group (1, 3): [(1, 3)]
Bisimulation Group (2, 0): [(2, 0)]
Bisimulation Group (2, 1): [(2, 1)]
Bisimulation Group (2, 2): [(2, 2)]
Bisimulation Group (2, 3): [(2, 3)]
Bisimulation Group (3, 0): [(3, 0)]
Bisimulation Group (3, 1): [(3, 1)]
Bisimulation Group (3, 2): [(3, 2)]
Bisimulation Group (3, 3): [(3, 3)]
