In [1]:
import numpy as np

# ---------- محیط ----------
GRID = [
    ['S', '-', '-', '-'],
    ['-', '#', '-', 'G']
]
ROWS = len(GRID)
COLS = len(GRID[0])
ACTIONS = ['up', 'down', 'left', 'right']
GAMMA = 0.99
REWARD_MOVE = -0.01
REWARD_GOAL = 1.0

def is_terminal(state):
    i, j = state
    return GRID[i][j] == 'G'

def is_wall(state):
    i, j = state
    return GRID[i][j] == '#'

def step(state, action):
    i, j = state
    if is_terminal(state) or is_wall(state):
        return state, 0
    ni, nj = i, j
    if action == 'up': ni = max(0, i - 1)
    elif action == 'down': ni = min(ROWS - 1, i + 1)
    elif action == 'left': nj = max(0, j - 1)
    elif action == 'right': nj = min(COLS - 1, j + 1)

    if is_wall((ni, nj)):
        return state, -1
    elif is_terminal((ni, nj)):
        return (ni, nj), REWARD_GOAL
    else:
        return (ni, nj), REWARD_MOVE

# ---------- اجرای سنکرون ----------
def synchronous_value_iteration(iterations=30):
    V = np.zeros((ROWS, COLS))
    for it in range(iterations):
        new_V = np.copy(V)
        for i in range(ROWS):
            for j in range(COLS):
                if is_wall((i,j)) or is_terminal((i,j)):
                    continue
                values = []
                for action in ACTIONS:
                    (ni, nj), r = step((i, j), action)
                    values.append(r + GAMMA * V[ni, nj])
                new_V[i, j] = max(values)
        V = new_V
    return V

# ---------- اجرای آسنکرون ----------
def asynchronous_value_iteration(iterations=30):
    V = np.zeros((ROWS, COLS))
    for it in range(iterations):
        for i in range(ROWS):
            for j in range(COLS):
                if is_wall((i,j)) or is_terminal((i,j)):
                    continue
                values = []
                for action in ACTIONS:
                    (ni, nj), r = step((i, j), action)
                    values.append(r + GAMMA * V[ni, nj])
                V[i, j] = max(values)
    return V

# ---------- اجرا ----------
V_sync = synchronous_value_iteration()
V_async = asynchronous_value_iteration()

print("\n Synchronous Value Iteration:")
print(np.round(V_sync, 2))

print("\n Asynchronous Value Iteration:")
print(np.round(V_async, 2))



 Synchronous Value Iteration:
[[0.94 0.96 0.98 1.  ]
 [0.92 0.   1.   0.  ]]

 Asynchronous Value Iteration:
[[0.94 0.96 0.98 1.  ]
 [0.92 0.   1.   0.  ]]
