In [1]:
import numpy as np

In [15]:
# Generate the reward matrix in the grid
# Grid position (i, j) is (col, row)
# The state index is computed by the formula: 
# index = (j - 1) * num_of_columns + (i - 1) with (i, j) represent ith column and jth row in the grid
# Example:
# Grid environment with four rows and five column
# Goal state is at (5, 4) whose state index is (4-1)*5 + (5 - 1) = 19
# Start state is at (1,1) whose state index is (1-1)*5 + (1 - 1) = 0

num_of_col = 5
num_of_row = 4
num_of_element = num_of_col * num_of_row

class GridStateMatrix:
    def __init__(self, num_of_col, num_of_row, default_value=0):
        self.num_of_col = num_of_col # number of columns of the grid environment (position)
        self.num_of_row = num_of_row # number of rows of the grid environment (position)
        self.num_of_state = self.num_of_col * self.num_of_row # number of states in the state-action matrix
        self.default_value = default_value # default value for initial matrix
        matrixValues = [self.default_value for _ in range(self.num_of_state * self.num_of_state)]
        self.gridStateMatrix = np.matrix(matrixValues).reshape((self.num_of_state, self.num_of_state))
        
    def get_state_index(self, col_index, row_index):
        return (row_index - 1) * num_of_col + (col_index - 1)
    
    def get_grid_position(self, state_index):
        if (state_index >= self.num_of_state):
            return -1, -1
        row_index = state_index // self.num_of_col + 1
        col_index = state_index % self.num_of_col + 1
        return row_index, col_index
    
    def get_neighbors(self, col_index, row_index):
        neighbors = list()
        if (row_index < self.num_of_row-1): # up
            neighbors.append((col_index, row_index+1))
        if (row_index > 0): # down
            neighbors.append((col_index, row_index-1))
        if (col_index > 0): # left
            neighbors.append((col_index-1, row_index))
        if (col_index < self.num_of_col-1): # right
            neighbors.append((col_index+1, row_index))
        return neighbors
    
    def set_element_value(self, value, state_index, action_index):
        if (state_index >= self.num_of_state):
            return False
        if (action_index >= self.num_of_state):
            return False
        self.gridStateMatrix[state_index, action_index] = value
        return True

    def get_state_matrix(self):
        return self.gridStateMatrix
        
Reward = GridStateMatrix(5,4,-1)

relations = Reward.get_neighbors(5,4)
for i, j in relations:
    print i, j
    s_index = Reward.get_state_index(i,j)
    a_index = Reward.get_state_index(5,4)
    Reward.set_element_value(100, s_index, a_index)
        
print Reward.get_state_matrix()

5 3
4 4
[[ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
   -1  -1]
 [ -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1

In [None]:
# initialize Q matrix
Q_current = GridStateMatrix(5,4)
Q_prev = GridStateMatrix(5,4)
print Q_current