In [57]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

In [81]:
class state:
    '''
    Policy
    These are what the policy numbers stand for:
        3
       1 2
        0
        
    '''
    def __init__(self):
        self.utility = 0
        self.policy = None
        self.reward = None
        
    def setReward(self, reward):
        self.reward = reward
        
    def getReward(self):
        return self.reward
    
    def getUtility(self):
        return self.utility
    
    def getPolicy(self):
        return self.policy

class grid:
    def __init__(self, width, height):
        self.height = height
        self.width = width
        self.size = self.width*self.height
        self.grid = [state() for i in range(self.size)] 
     
    #Enter rewards as an array interpretted as rewards[x*width + height]
    def setGridRewards(self, rewards):
        for i in range(len(rewards)):
            self.grid[i].setReward(rewards[i])
            self.grid[i].utility = rewards[i]
            
    def printGridRewards(self):
        for i in reversed(range(self.height)):
            for j in range(self.width):
                if self.grid[i*self.width+j].reward == None:
                    print "\t X",
                else:
                    print "\t", self.grid[i*self.width+j].reward,
            print
            
    def getUtilities(self):
        utilities = []
        for i in reversed(range(self.height)):
            for j in range(self.width):
                if self.grid[i*self.width+j].utility == None:
                    utilities.append(0)
                else:
                    utilities.append(self.grid[i*self.width+j].utility)
        return utilities                    
            
    def printGridUtility(self):
        for i in reversed(range(self.height)):
            for j in range(self.width):
                if self.grid[i*self.width+j].utility == None:
                    print "\t X",
                else:
                    print "\t", round(self.grid[i*self.width+j].utility,3),
            print
            
    def printGridPolicy(self):
        moves = ['v ', '< ', '> ', '^ ']
        for i in reversed(range(self.height)):
            for j in range(self.width):
                if self.grid[i*self.width+j].policy == None:
                    if self.grid[i*self.width+j].reward > 0:
                        print str(self.grid[i*self.width+j].reward) + ' ',
                    elif self.grid[i*self.width+j].reward == None:
                        print 'X ',
                    else:
                        print str(self.grid[i*self.width+j].reward),
                else:
                    print moves[self.grid[i*self.width+j].policy],
            print
    
    def valueIteration(self, gamma, empty, numIterations = 50):
        for i in range(numIterations):
            for j in range(self.size):
                expectedUtility = []
                
                #set expected utilities
                if self.grid[j].reward == empty:
                    
                    # find utility of neighbors
                    if j >= self.width:
                        bottomUtility = self.grid[j-self.width].utility
                    else:
                        bottomUtility = self.grid[j].utility
                    
                    if j%6 != 0:
                        leftUtility = self.grid[j-1].utility
                    else:
                        leftUtility = self.grid[j].utility
                        
                    if j%6 != 5:
                        rightUtility = self.grid[j+1].utility
                    else:
                        rightUtility = self.grid[j].utility
                        
                    if j < (self.height-1)*self.width:
                        topUtility = self.grid[j+self.width].utility
                    else:
                        topUtility = self.grid[j].utility
                    
                    #if you hit a wall stay in the same state
                    if bottomUtility == None:
                        bottomUtility = self.grid[j].utility
                    if leftUtility == None:
                        leftUtility = self.grid[j].utility
                    if rightUtility == None:
                        rightUtility = self.grid[j].utility
                    if topUtility == None:
                        topUtility = self.grid[j].utility
                        
                    expectedUtility.append(0.8*bottomUtility+0.1*leftUtility+0.1*rightUtility)
                    expectedUtility.append(0.8*leftUtility+0.1*topUtility+0.1*bottomUtility)
                    expectedUtility.append(0.8*rightUtility+0.1*topUtility+0.1*bottomUtility)
                    expectedUtility.append(0.8*topUtility+0.1*leftUtility+0.1*rightUtility)
                    
                    self.grid[j].policy = expectedUtility.index(max(expectedUtility))
                    self.grid[j].utility = self.grid[j].reward+gamma*max(expectedUtility)
                    
                         

empty = -0.4
newgrid = grid(6,6)
newgrid.setGridRewards([1,-1, empty, None, -1, -1,
                    empty, empty, empty, empty, empty, empty,
                    empty, empty, empty, None, empty, empty,
                    empty, empty, empty, None, empty, 3,
                    empty, empty, empty, None, -1, empty,
                    empty, -1, empty, empty, empty, empty,
                   ])
newgrid.valueIteration(0.99, -0.4)
newgrid.printGridUtility()
newgrid.printGridPolicy()

	-1.455 	-1.0 	-0.34 	0.229 	0.737 	1.468
	-1.201 	-1.307 	-0.887 	 X 	-1.0 	2.083
	-0.698 	-1.067 	-0.901 	 X 	2.058 	3.0
	-0.155 	-0.643 	-0.387 	 X 	1.829 	2.394
	0.409 	-0.239 	0.146 	0.79 	1.306 	1.804
	1.0 	-1.0 	-0.426 	 X 	-1.0 	-1.0
>  -1 >  >  >  v 
v  >  ^  X  -1 v 
v  v  v  X  >  3 
v  v  v  X  >  ^ 
v  <  >  >  ^  ^ 
1  -1 ^  X  -1 -1


In [9]:
print str([1,2,3])

[1, 2, 3]
