## Value Iteration - Table Representation

#### Imports

In [2]:
import numpy as np
np.random.seed(1337)

#### Parameters

In [None]:
n_states = 10 # Number of states
gamma = 0.9 # Discount Factor
tolerance = 0.00001 # Convergence criteria

#### Set state rewards

In [None]:
rewards = np.zeros(n_states)
rewards[9] = 1 # Goal state
rewards[8] = -1 # Penalty state

#### Define transition probabilities

In [None]:
transition_prob = np.random.random([n_states,n_states])
s = transition_prob.sum(axis=-1)
transition_prob = transition_prob/np.repeat(s,10).reshape([n_states,n_states]) # Normalization
transition_prob[8] = 0 # Make goal state terminal
transition_prob[9] = 0 # Make penalty state terminal

#### Initialize state values

In [None]:
state_values = np.zeros(n_states)
estimated_state_values = np.zeros(n_states)

#### Value Iteration through Bellman updates until convergence

In [17]:
while True:
    k += 1
    for s in range(n_states):
        estimated_state_values[s] = rewards[s] + gamma*np.dot(transition_prob[s], state_values) # Bellman Update
    if np.abs((state_values - estimated_state_values)).mean() < tolerance:
        break
    state_values = estimated_state_values.copy()
    print state_values
        
print(state_values)

[ 0.  0.  0.  0.  0.  0.  0.  0. -1.  1.]
[ 0.01099568  0.05100129  0.09972812  0.16620632 -0.11941359  0.07652025
 -0.00835717 -0.16466717 -1.          1.        ]
[ 0.0321968   0.0711343   0.13001374  0.12646647 -0.11399018  0.11503669
  0.0198571  -0.1704604  -1.          1.        ]
[ 0.04265429  0.08482579  0.13869135  0.1332452  -0.10814536  0.12108797
  0.03088385 -0.15707057 -1.          1.        ]
[ 0.04925874  0.09116212  0.14567394  0.14037227 -0.10170985  0.12835814
  0.03870479 -0.1510823  -1.          1.        ]
[ 0.05429988  0.09568022  0.15095448  0.14496013 -0.09707606  0.13329825
  0.04450458 -0.14645513 -1.          1.        ]
[ 0.05790992  0.0989873   0.1547073   0.14836523 -0.09373552  0.13685888
  0.04865646 -0.14310042 -1.          1.        ]
[ 0.06051827  0.10136571  0.15742536  0.15082078 -0.09131906  0.13943684
  0.05165775 -0.1406849  -1.          1.        ]
[ 0.06240263  0.10308446  0.15938827  0.15259354 -0.08957411  0.1412978
  0.0538256  -0.13893917 