## System Setup
Set up the systerm with state detection, actions transition probability calculation

In [138]:
import numpy as np

class myStates:
    def __init__(self,Length,Width):
        print("new state created with size %d X %d..." % (Length,Width))
        self.L = Length
        self.W = Width
        if self.L <= 0 or self.W <= 0:
            raise Exception('Dimension of state should be positive integers')
        
        self.stateMatrix = []
        # Create state list
        dir_mat = np.array(range(12))
        for i in range(self.L):
            for j in range(self.W):
                for k in dir_mat:
                    self.stateMatrix.append((i,j,k))
        
# Creating Actions
class myActions:
    def __init__(self,act,turn):
        self.actionMat = (act,turn)
        # [0,0]: Stay still
        # [1,0]: Forward only
        # [1,1]: Forward clockwise
        # [1,-1]: Forward counter-clockwise
        # [-1,0]: Backward only
        # [-1,1]: Backward clockwise
        # [-1,-1]: Backward counter-clockwise
        print('action done...')
        
# Creating Probability Space functions
def transitionProbability(pe,s,a,s_next,myStates):
    
    L = myStates.L
    W = myStates.W
    # this function takes error probability pe, current state s = (x,y,h), future
    # state s_next = (x',y',h') and size of states (L,W) as inputs, returns the transition 
    # probability between each state
    
    # pe threshold
    if pe > 0.5 or pe < 0.0:
        raise Exception('Error probability should lie between 0 and 0.5')
    
    # define possible cartesian movement
    pos_x = [1,0]
    pos_y = [0,1]
    neg_x = [-1,0]
    neg_y = [0,-1]
    
    # create a dictionary for possible heading direction based on current heading,
    # consisting of three possible heading configuration for next state
    
    # h_dic[h] = [(moving_direction,h',possibility),~,~]
    h_dic = {}
    
    h_dic[0] = [(pos_y,0,1-2*pe),(pos_y,1,pe),(pos_x,11,pe)]
    h_dic[1] = [(pos_y,1,1-2*pe),(pos_x,2,pe),(pos_x,0,pe)]
    h_dic[2] = [(pos_x,2,1-2*pe),(pos_x,3,pe),(pos_x,1,pe)]
    h_dic[3] = [(pos_x,3,1-2*pe),(pos_x,4,pe),(neg_y,2,pe)]
    h_dic[4] = [(pos_x,4,1-2*pe),(neg_y,5,pe),(neg_y,3,pe)]
    h_dic[5] = [(neg_y,5,1-2*pe),(neg_y,6,pe),(neg_y,4,pe)]
    h_dic[6] = [(neg_y,6,1-2*pe),(neg_y,7,pe),(neg_x,5,pe)]
    h_dic[7] = [(neg_y,7,1-2*pe),(neg_x,8,pe),(neg_x,6,pe)]
    h_dic[8] = [(neg_x,8,1-2*pe),(neg_x,9,pe),(neg_x,7,pe)]
    h_dic[9] = [(neg_x,9,1-2*pe),(neg_x,10,pe),(pos_y,8,pe)]
    h_dic[10] = [(neg_x,10,1-2*pe),(pos_y,11,pe),(pos_y,9,pe)]
    h_dic[11] = [(pos_y,11,1-2*pe),(pos_y,0,pe),(pos_y,10,pe)]
    
    
    # create a dictionary for transition probability based on future state 
    # and current state
    transProb = {}

    for map_key in h_dic[s[2]]:
        x_new = s[0] + a[0]*map_key[0][0]   # move in x direction, a[0] indicates forward or backward
        xd = x_new if (x_new <= L-1 and x_new >= 0) else s[0]       # else for off-grid movement
        y_new = s[1] + a[0]*map_key[0][1]   # move in y direction, a[0] indicates forward or backward
        yd = y_new if (y_new <= W-1 and y_new >= 0) else s[1]       # else for off-grid movement
        hd = (map_key[1] + a[1]) % 12       # new heading direction
        if a[0] == 0 and a[1] == 0:
            transProb[s] = 1
        else: transProb[(xd,yd,hd)] = map_key[2]
    
    # match with the keys in transProb dictionary
    if s_next in transProb.keys():
        print("p = %f" %(transProb[s_next]))
        return(transProb[s_next]);
    else: 
        print("p = 0")
        return 0.0
    
        

# update state based on action and current state
def stateUpdate(pe,s,a,myStates):
    
    stateList = myStates.stateMatrix
    P = []
    # search for probability trasferring to state s_next given current state and action
    for s_next in stateList:
        pt = transitionProbability(pe,s,a,s_next,myStates)
        if pt != 0:
            P.append((s_next,pt))
            print(P)
    
    prob = np.array([])
    for p in P:
        prob = np.append(prob,p[1])

    # return a choice given discrete pdf
    state_id = np.random.choice(np.arange(len(P)),p=prob)
    s_next = P[state_id][0]
    return(s_next)
    


    
    

## Environment Setup

In [165]:
# Create reward map for state input
def rewardFun(s,myStates):
    
    # Extract information from states
    S = myStates.stateMatrix
    L = myStates.L
    W = myStates.W
    
    
    x_pos = s[0]
    y_pos = s[1]
    h = s[2]
    
    if x_pos < 0 or x_pos >= L or y_pos < 0 or y_pos >= W or h < 0 or h >= 12:
        raise Exception('Invalid state definition: [x,y,h] should be within range')
    
    pos = [x_pos,y_pos]
    
    if x_pos == 0 or y_pos == 0 or x_pos == (L-1) or y_pos == (W-1):
        r = -100
    elif pos == [2,2] or pos == [2,3] or pos == [2,4] or pos == [4,2] or pos == [4,3] or pos == [4,4]:
        r = -1
    elif pos == [3,4]:
        r = 1
    else: r = 0
     
    print("reward for state (%d,%d,%d) is %d" %(s[0],s[1],s[2],r))
    return r
        
        

In [167]:
# testing & debugging: 
S = myStates(4,3)
S = myStates(2,7)
S = myStates(1,1)
S = myStates(6,6)

myState = S.stateMatrix
print(S.stateMatrix)


p = transitionProbability(0.2,(0,0,0),(0,0),(0,0,1),S)

s_next = stateUpdate(0.2,(4,1,1),(1,1),S)

r = rewardFun((0,0,0),S)

new state created with size 4 X 3...
new state created with size 2 X 7...
new state created with size 1 X 1...
new state created with size 6 X 6...
[(0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 0, 4), (0, 0, 5), (0, 0, 6), (0, 0, 7), (0, 0, 8), (0, 0, 9), (0, 0, 10), (0, 0, 11), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5), (0, 1, 6), (0, 1, 7), (0, 1, 8), (0, 1, 9), (0, 1, 10), (0, 1, 11), (0, 2, 0), (0, 2, 1), (0, 2, 2), (0, 2, 3), (0, 2, 4), (0, 2, 5), (0, 2, 6), (0, 2, 7), (0, 2, 8), (0, 2, 9), (0, 2, 10), (0, 2, 11), (0, 3, 0), (0, 3, 1), (0, 3, 2), (0, 3, 3), (0, 3, 4), (0, 3, 5), (0, 3, 6), (0, 3, 7), (0, 3, 8), (0, 3, 9), (0, 3, 10), (0, 3, 11), (0, 4, 0), (0, 4, 1), (0, 4, 2), (0, 4, 3), (0, 4, 4), (0, 4, 5), (0, 4, 6), (0, 4, 7), (0, 4, 8), (0, 4, 9), (0, 4, 10), (0, 4, 11), (0, 5, 0), (0, 5, 1), (0, 5, 2), (0, 5, 3), (0, 5, 4), (0, 5, 5), (0, 5, 6), (0, 5, 7), (0, 5, 8), (0, 5, 9), (0, 5, 10), (0, 5, 11), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1,

In [124]:
np.arange(2)

array([0, 1])