In [10]:
import numpy as np

In [11]:
class gridWorld():
    def __init__(self, grid_size=5, actions=['N', 'S', 'E', 'W'], gamma=0.9):
        self.grid_size = grid_size
        self.actions = actions
        self.grid_values = np.zeros((grid_size, grid_size))
        self.prev_values = np.zeros((grid_size, grid_size))
        self.gamma = gamma
        
    def get_new_state_reward(self, state, action):
        """
        input state tuple and action char
        
        Returns state, reward
        """
        x, y = state
        if (x==0 and y==0) and (action=='N' or action=='W'):
            return (x, y), -1
        
        elif (x==0 and y==self.grid_size-1) and (action=='N' or action=='E'):
            return (x, y), -1
        
        elif (x==self.grid_size-1 and y==self.grid_size-1) and (action=='S' or action=='E'):
            return (x, y), -1
        
        elif (x==self.grid_size-1 and y==0) and (action=='S' or action=='W'):
            return (x, y), -1
        
        elif x==0 and action=='N':
            return (x, y), -1
        
        elif x==self.grid_size-1 and action=='S':
            return (x, y), -1
        
        elif y==0 and action=='W':
            return (x, y), -1
        
        elif y==self.grid_size-1 and action=='E':
            return (x, y), -1
        
        elif x==0 and y==1:
            return (4, 1), 10
        
        elif x==0 and y==3:
            return (2, 3), 5
        
        elif action=='N':
            return (x-1, y), 0
        elif action=='S':
            return (x+1, y), 0
        elif action=='E':
            return (x, y+1), 0
        elif action=='W':
            return (x, y-1), 0
        
        else:
            print("Unknown state or action:", state, action)
            
    def get_new_value_estimate(self, i, j):
        temp = 0
        for action in self.actions:
            a, b = self.get_new_state_reward((i, j), action)
            temp += 0.25 * b
            temp += 0.25 * self.gamma * self.grid_values[a[0], a[1]]
            
        return temp
            
    def value_iteration(self):
        for i in range(self.grid_size):
            for j in range(self.grid_size):
                self.grid_values[i, j] = self.get_new_value_estimate(i, j)
                
    def get_grid(self):
        for i in range(10000):
            self.value_iteration()
            print("Iteration:", i)
            print(self.grid_values, "\n")
            

In [25]:
"""
Q2. 

We attempt to solve the system of 25 linear equations (1 equation per state)
using Ax = b, where A is a matrix of coefficients of the equations, x is a
a vector of all state values and b is a vector of constants obtained from the 
system of equations.

This can be done using the numpy library.
"""
import numpy as np
from numpy import linalg

A = np.zeros((25, 25), dtype=np.float32)
b = np.zeros((25,), dtype=np.float32)
with open('variable_coeffs.csv', 'r') as f:
    lines = f.readlines()
    
for i, line in enumerate(lines):
    A[i] = np.asarray(list(map(float, line.split(","))))
    
with open('constants.csv', 'r') as f:
    lines = f.readlines()
    
for i, line in enumerate(lines):
    b[i] = np.asarray(list(map(float, line.split(","))))
    
# print(A)
# print(b)

x = linalg.solve(A, b)
x = np.reshape(x, (5, 5))
print(x)

[[ 3.3089962   8.789292    4.427619    5.3223677   1.4921786 ]
 [ 1.521588    2.9923177   2.2501397   1.9075716   0.54740256]
 [ 0.05082239  0.73817044  0.67311317  0.35818613 -0.40314123]
 [-0.97359234 -0.43549547 -0.3548823  -0.58560514 -1.1830751 ]
 [-1.8577005  -1.3452313  -1.2292674  -1.4229182  -1.975179  ]]
