In [9]:
# imports 

from grid_world import standard_grid 
import numpy as np 

In [2]:
# define functions 

def print_values(V, g):
  for i in range(g.rows):
    print("---------------------------")
    for j in range(g.cols):
      v = V.get((i,j), 0)
      if v >= 0:
        print(" %.2f|" % v, end="")
      else:
        print("%.2f|" % v, end="") # -ve sign takes up an extra space
    print("")


def print_policy(P, g):
  for i in range(g.rows):
    print("---------------------------")
    for j in range(g.cols):
      a = P.get((i,j), ' ')
      print("  %s  |" % a, end="")
    print("")

In [3]:
# import a grid 

grid = standard_grid() 

In [5]:
grid.current_state()

(2, 0)

In [17]:
# states will be positions (i,j)
# simpler than tic-tac-toe because we only have one "game piece"
# that can only be at one position at a time
states = grid.all_states()

### uniformly random actions ###

# initialize value function to zero for all states 
V = {}

# V is a dictionary holding each state as key and the value function as value 

for s in states: 
    V[s] = 0 

# initialize discount factor 
gamma = 1
threshold = 0.01

while True: 
    delta = 0 
    
    for s in states: 
        # save away old value of the value function at state s 
        old_v = V[s]
        
        if s in grid.actions: 
            # each action has the same probability 
            p_a = 1/len(grid.actions[s])

            # initialize new_v as we perform calculations over all actions 
            new_v = 0
            for a in grid.actions[s]: 
                grid.set_state(s)
                r = grid.move(a)
                new_v += p_a * (r + gamma * V[grid.current_state()])

            V[s] = new_v 
            delta = max(delta, np.abs(old_v - new_v))
            
    if delta < threshold: 
        break

In [18]:
print("values for uniformly random actions:")
print_values(V, grid)
print("\n\n")

values for uniformly random actions:
---------------------------
 0.01| 0.13| 0.24| 0.00|
---------------------------
-0.11| 0.00|-0.42| 0.00|
---------------------------
-0.24|-0.37|-0.52|-0.76|



