In [None]:
# The grid is defined with lengh and width. Total number of states and actions are calculated. 

import numpy as np
import matplotlib.pyplot as plt

L = 8 # grid length constant
W = 8 # grid width constant

S = [] # initialize the state space

# iterate all possible states
for x in range(W):
    for y in range(L):
        for dirc in range(12):
            S.append([x,y,dirc])
            
A = [] # initialize the action space

# define possible individual actions
SIT = 0
FWD = 1
BWD = -1
NO_TURN = 0
L_TURN = -1
R_TURN = 1

# list all possible combined actions
for step in [SIT, FWD, BWD]:
    if step != SIT:
        for turn in [NO_TURN,L_TURN, R_TURN]:
            A.append([step, turn])
    else:
        A.append([SIT, NO_TURN])

In [None]:
# This function returns possible next states and their corresponding probabilities based on the current state,  
# next action and the (constant) pre-rotate error probability. 

def next_state_distr(s, a, p_e):
    x = s[0]
    y = s[1]
    dirc = s[2]
    step = a[0]
    turn = a[1]
    S_next_distr = {}
    s_index = 0
    
    if step != 0: 
        for turn_error in [NO_TURN,L_TURN, R_TURN]: # handle pre-rotate error
            x_new = x
            y_new = y
            s_p_new = []
            p_sa = 0
            if turn_error == 0:
                dirc_new = dirc
                p_sa = 1-2*p_e
            else:
                dirc_new = dirc + turn_error
                p_sa = p_e

            dirc_new = dirc_new % 12 # translational movement according to the direction
            if dirc_new in [11, 0, 1]:
                y_new = y + step
            elif dirc_new in [2, 3, 4]:
                x_new = x + step
            elif dirc_new in [5, 6, 7]:
                y_new = y - step
            else: 
                x_new = x - step
            
            dirc_new = (dirc_new + turn) % 12
            if x_new < 0 or x_new >= W or y_new < 0 or y_new >= L: # handle attempts to move off grid
                s_p_new = [x, y, dirc_new, p_sa]
            else:
                s_p_new = [x_new, y_new, dirc_new, p_sa] 
            
            S_next_distr[s_index] = s_p_new
            s_index += 1

    else:
        S_next_distr[s_index] = [x, y, dirc, 1]
    return S_next_distr

In [None]:
# This function returns the probability of the next state based on the current state, 
# next action and the (constant) pre-rotate error probability. 

def next_state_p(s, a, s_prime, p_e):
    p_sa = 0
    S_next_distr = next_state_distr(s, a, p_e)
    
    # iterate through all possible states in case there are repeated cases
    for s_index in S_next_distr:
        s_new = S_next_distr[s_index][0:3]
        p_new = S_next_distr[s_index][3]
        if s_new == s_prime:
            p_sa += p_new 
    return p_sa

In [None]:
# This function returns a state that is generated based on the current state, the action 
# and the (constant) pre-rotate probability.

import random 

def next_state(s, a, p_e):
    S_next_distr = next_state_distr(s,a,p_e)
    seed = random.random()
    prob = 0
    
    # campare random number with the aggregated probablity of potential next states
    for s_index in S_next_distr:
        prob += S_next_distr[s_index][3]
        if seed < prob:
            state = S_next_distr[s_index][0:3]
            return state
    return None

In [None]:
# This function returns the reward given the current state.

import numpy

def reward(s):
    x = s[0]
    y = s[1]
    
    # define rewards all over the map 
    R = numpy.zeros((W, L))
    # define lane markers
    for i in [4, 5, 6]:
        R[3][i] = -10
    # define walls
    for i in range(W):
        for j in range(L):
            if i == 0 or i == 7 or j == 0 or j == 7:
                R[i][j] = -100
    # define goal sqaure
    R[5][6] = 1
    
    return R[x][y]

In [None]:
print('1.1 For a grid of %d x %d, there are %d states.' % (L, W, len(S)))

In [None]:
print('1.2 The robot can choose from %d actions.' % len(A))

In [None]:
# This function returns the word for an action, based on the input of an action array.
# It will be used in testings later. 
def action_to_words(a):
    a_word = []
    if a[0] == SIT:
        a_word.append('SIT')
    elif a[0] == FWD:
        a_word.append('FWD')
    else:
        a_word.append('BWD')
    
    if a[1] == NO_TURN:
        a_word.append('NO_TURN')
    elif a[1] == L_TURN:
        a_word.append('L_TURN')
    else:
        a_word.append('R_TURN')
    
    return a_word

In [None]:
# Four cases are tested for 1(c). 

print('1.3 Test cases: ')
states = [[1,1,0], [2,3,5], [7,7,6], [5,6,2]]
next_states = [[1,1,0], [2,2,4], [7,7,8], [4,6,3]]
actions = [[SIT, NO_TURN], [FWD, L_TURN], [BWD, R_TURN], [BWD, NO_TURN]]
p_errors = [0, 0.1, 0.2, 0.25]

for i in range(len(p_errors)):
    s = states[i]
    a = actions[i]
    s_prime = next_states[i]
    p_e = p_errors[i]
    p_sa = next_state_p(s, a, s_prime, p_e)
    
    print('Case %s: current state:%s, action:%s, next state:%s, p_e:%s' \
          % (i+1, s, action_to_words(a), s_prime, p_e))
    print('p_sa = %s' % p_sa)

In [None]:
# Four cases are tested for 1(d). Distribution of next states generated are displayed. 
# Test cases are defined in 1(c).

S_SIZE = 100
RUN = 5
print('1.4 Counts of next states generated with %d run and the sample size of %d:' % (RUN, S_SIZE))


for i in range(len(p_errors)):
    s = states[i]
    a = actions[i]
    p_e = p_errors[i]
    print('Case %s: current state:%s, action:%s, p_e:%s' % (i+1, s, action_to_words(a), p_e))
    for r in range(RUN):
        next_state_counter = {}
        for n in range(S_SIZE):
            s_prime = str(next_state(s, a, p_e))

            if s_prime in next_state_counter:
                next_state_counter[s_prime] += 1
            else:
                next_state_counter[s_prime] = 1
        print('Run %s: p_prime and counts: %s' % (r+1, next_state_counter))

In [None]:
# Map for rewards is displayed, assuming the origin is at the bottom left 
# and x axis to the right. 

print('2. Reward Map')
for j in range(L):    
    for i in range(W):
        r = int(reward([i,7-j,0]))
        if i == 7 :
            print(r)
        else:
            print(r, end = ' ' * (6 - len(str(r))))