In [1]:
import numpy as np
import random
import pygame

env_y = 5
env_x = 5
env_z = env_y * env_x
env_state_space = np.ones((env_z, env_y, env_x))
env_state_space = np.negative(env_state_space)
print("State Space: ", env_state_space.shape)

y_bound = env_y - 1
x_bound = env_x - 1

env_term_state = list()
for y in range(env_y):
    for x in range(env_x):
        env_term_state.append((y,x))
        

print("Terminal State Co-ordinates", env_term_state)

for state in range(env_z):
    env_state_space[state][env_term_state[state]] = 50

print(env_state_space[24])

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html
State Space:  (25, 5, 5)
Terminal State Co-ordinates [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]
[[-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. 50.]]


# Movement Code of the Environment

In [2]:
def move(state,action):
    # (y,x)
    
    switch = {
        0: up,
        1: right,
        2: down,
        3: left,
        4: pickup
    }

    func = switch.get(action, lambda: "Invalid action")
    new_state = func(state)
    return new_state

def valid_move(state):
    if state[0] > y_bound:
        state = (state[0]-1, state[1])
    elif state[0] < 0:
        state = (state[0]+1, state[1])
        
    if state[1] > x_bound:
        state = (state[0], state[1] - 1)
    elif state[1] < 0:
        state = (state[0], state[1] + 1)
    
    return state
    
def up(state):
    new_state = (state[0] - 1, state[1])
    new_state = valid_move(new_state)
    return new_state

def right(state):
    new_state = (state[0], state[1] + 1)
    new_state = valid_move(new_state)
    return new_state
    
def down(state):
    new_state = (state[0] + 1, state[1])
    new_state = valid_move(new_state)
    return new_state
    
def left(state):
    new_state = (state[0], state[1] - 1)
    new_state = valid_move(new_state)
    return new_state
    
def pickup(state):
    return state

# Actual transition of the environment based on action

In [3]:
def step(state, action):
    
    state_prime = move(state,action)
    
    reward = env_state_space[(24, state_prime[0], state_prime[1])]
    
    #insert compare to grand dictionary of terminal states
    
    if state_prime == env_term_state[24]:
        done = True
        #done = False
    else:
        done = False
    
    return state_prime, reward, done

# Best action selection function

In [4]:
def best_action_value(state):
    best_action = None
    best_value  = float('-inf')

    for action in ACTION_SPACE:
        state_prime, reward, done = step(state, action) 
    
        v = reward + gamma * V[24, state_prime[0], state_prime[1]] 
        
        if v > best_value:
            best_value = v
            best_action = action
    return best_action

In [6]:
ACTION_SPACE = list(range(0,4))

print("Action Space:")
print(ACTION_SPACE)
print(["Up", "Right", "Down", "Left"])

Reward = env_state_space
V = np.zeros((env_z, env_y, env_x)) 
Pi = np.zeros((env_z, env_y, env_x))  

print("V - ", V.shape)
print("Pi - ", Pi.shape)
print(Reward[24])

print(V[24])
print(Pi[24])


gamma = 0.9 
significant_improvement = 0.01 

Action Space:
[0, 1, 2, 3]
['Up', 'Right', 'Down', 'Left']
V -  (25, 5, 5)
Pi -  (25, 5, 5)
[[-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. -1.]
 [-1. -1. -1. -1. 50.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [7]:
#for one slice of state space
for iteration in range(20):
    z = 24
    for y in range(env_y):
        for x in range(env_x):
            s = (y, x)
            print("Current State - ", s)
            #old_v = V[(24, s[0], s[1])]
            action = best_action_value(s)
            print("Action: ", action)
            s_new, rew, done = step(s,action) 
            V[24,s[0],s[1]] = rew + gamma * V[(24,s_new[0],s_new[1])] 
            Pi[24,s[0], s[1]] = action
print(V[24])
print(Pi[24])

Current State -  (0, 0)
Action:  0
Current State -  (0, 1)
Action:  0
Current State -  (0, 2)
Action:  0
Current State -  (0, 3)
Action:  0
Current State -  (0, 4)
Action:  0
Current State -  (1, 0)
Action:  1
Current State -  (1, 1)
Action:  1
Current State -  (1, 2)
Action:  1
Current State -  (1, 3)
Action:  1
Current State -  (1, 4)
Action:  1
Current State -  (2, 0)
Action:  1
Current State -  (2, 1)
Action:  1
Current State -  (2, 2)
Action:  1
Current State -  (2, 3)
Action:  1
Current State -  (2, 4)
Action:  1
Current State -  (3, 0)
Action:  1
Current State -  (3, 1)
Action:  1
Current State -  (3, 2)
Action:  1
Current State -  (3, 3)
Action:  1
Current State -  (3, 4)
Action:  2
Current State -  (4, 0)
Action:  1
Current State -  (4, 1)
Action:  1
Current State -  (4, 2)
Action:  1
Current State -  (4, 3)
Action:  1
Current State -  (4, 4)
Action:  1
Current State -  (0, 0)
Action:  0
Current State -  (0, 1)
Action:  0
Current State -  (0, 2)
Action:  0
Current State -  (0,

Current State -  (1, 0)
Action:  1
Current State -  (1, 1)
Action:  1
Current State -  (1, 2)
Action:  1
Current State -  (1, 3)
Action:  1
Current State -  (1, 4)
Action:  2
Current State -  (2, 0)
Action:  1
Current State -  (2, 1)
Action:  1
Current State -  (2, 2)
Action:  1
Current State -  (2, 3)
Action:  1
Current State -  (2, 4)
Action:  2
Current State -  (3, 0)
Action:  1
Current State -  (3, 1)
Action:  1
Current State -  (3, 2)
Action:  1
Current State -  (3, 3)
Action:  1
Current State -  (3, 4)
Action:  2
Current State -  (4, 0)
Action:  1
Current State -  (4, 1)
Action:  1
Current State -  (4, 2)
Action:  1
Current State -  (4, 3)
Action:  1
Current State -  (4, 4)
Action:  1
Current State -  (0, 0)
Action:  1
Current State -  (0, 1)
Action:  1
Current State -  (0, 2)
Action:  1
Current State -  (0, 3)
Action:  1
Current State -  (0, 4)
Action:  2
Current State -  (1, 0)
Action:  1
Current State -  (1, 1)
Action:  1
Current State -  (1, 2)
Action:  1
Current State -  (1,

In [34]:
done = False
reward_total = 0
iteration = 0

start_y = np.random.randint(low=0, high=env_y)
start_x = np.random.randint(low=0, high=env_x)
current_state = (start_y, start_x)

BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
RED = (255, 0, 0)
 
pygame.init()

CONSTANT_SIZE = 50
height = env_state_space.shape[1]
width = env_state_space.shape[2]

size_y = (CONSTANT_SIZE * height) + height + 1
size_x = (CONSTANT_SIZE * width) + width + 1
screen_size = (size_y, size_x)

screen = pygame.display.set_mode(screen_size)

pygame.display.set_caption("Badworld")
 
session = True

clock = pygame.time.Clock()

print("Starting position: ", current_state)

screen.fill(BLACK)
    
for y in range(height):
    for x in range(width):                
        if y == current_state[0] and x == current_state[1]:
            colour = GREEN
        elif env_state_space[24,y,x] == 50:
            colour = RED
        else:
            colour = WHITE
            
        rect_pos_x = x*(CONSTANT_SIZE+1)+1
        rect_pos_y = y*(CONSTANT_SIZE+1)+1
        rect = pygame.Rect(rect_pos_x, rect_pos_y , CONSTANT_SIZE, CONSTANT_SIZE)
        pygame.draw.rect(screen, colour, rect) 

pygame.display.flip()
pygame.time.wait(1000)    
            
while session:
    
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            session = False
 
    if done == True:
        break

    screen.fill(BLACK)
    
    action = Pi[24,current_state[0],current_state[1]]
    print(action)
    obs, rew, done = step(current_state, action) 
    print(obs)
    current_state = obs
    reward_total += rew
    
    for y in range(height):
        for x in range(width):                
            if y == current_state[0] and x == current_state[1]:
                colour = GREEN
            elif env_state_space[24,y,x] == 50:
                colour = RED
            else:
                colour = WHITE
            
            rect_pos_x = x*(CONSTANT_SIZE+1)+1
            rect_pos_y = y*(CONSTANT_SIZE+1)+1
            rect = pygame.Rect(rect_pos_x, rect_pos_y , CONSTANT_SIZE, CONSTANT_SIZE)
            pygame.draw.rect(screen, colour, rect)    
    
    print("Reward: " ,reward_total)
    
    pygame.display.flip()
    pygame.time.wait(1000)
    clock.tick(60)

pygame.quit()

print("Ending position: ", current_state)

Starting position:  (0, 4)
2.0
(1, 4)
Iteration 0 - Reward: -1.0
2.0
(2, 4)
Iteration 0 - Reward: -2.0
2.0
(3, 4)
Iteration 0 - Reward: -3.0
2.0
(4, 4)
Iteration 0 - Reward: 47.0
Ending position:  (4, 4)


In [30]:
pygame.quit()

In [None]:
#texting dreams

#font = pygame.font.Font('freesansbold.ttf', 10) 
#draw_text = True

#if draw_text == True:
#        text_pos_x = current_state[1]*(CONSTANT_SIZE+1)+1
#        text_pos_y = current_state[0]*(CONSTANT_SIZE+1)+1
#        rect = pygame.Rect(text_pos_x, text_pos_y , CONSTANT_SIZE, CONSTANT_SIZE)
#        text_word = "(" +  str(current_state[0]) + "; " + str(current_state[1]) + ")"
#        text_pos_x = current_state[1]*(CONSTANT_SIZE+1)+1
#        text_pos_y = current_state[0]*(CONSTANT_SIZE+1)+1
#        text = font.render(text_word, True, BLACK, WHITE) 
#        textRect = text.get_rect()  
#        textRect.center = (text_pos_x+(CONSTANT_SIZE/2), text_pos_y+(CONSTANT_SIZE/2)) 
#        screen.blit(text, textRect)