### Question3: MDP
- Note that each iteration is taking around 4sec if size = (10, 5, 5) and 7 minute if size = (20, 10, 10).
- Method:
> Indefinitte Value Iteration. 

In [441]:
import itertools
import numpy as np
from tqdm import tqdm, tqdm_notebook
import copy

def poisson(lambda1, n):
    return (np.power(lambda1, n)/np.math.factorial(n))*(np.exp(-lambda1))

##########CONSTANTS###########
requestLambda = [3, 2, 2]
returnLambda = [3, 1, 1]

#Number of maximum cars allowed at loc1, loc2, loc3
ml1, ml2, ml3 = 10, 5, 5

gamma = 0.9
numIter = 10
tol = 0.01
Size = (ml1, ml2, ml3)

n1 = np.arange(ml1)
n2 = np.arange(ml2)
n3 = np.arange(ml3)
n4 = np.arange(-5, 6)
#=============================#

#================PreCalculations of Probability and Rewards============#
JointProbLocation1 = np.ones((ml1, ml1, ml1, ml1))*0
JointProbLocation2 = np.ones((ml2, ml2, ml2, ml2))*0
JointProbLocation3 = np.ones((ml3, ml3, ml3, ml3))*0
JointRewardLocation1 = np.ones((ml1, ml1, ml1, ml1))*0
JointRewardLocation2 = np.ones((ml2, ml2, ml2, ml2))*0
JointRewardLocation3 = np.ones((ml3, ml3, ml3, ml3))*0

ProbRequestLocation1 = np.asarray([poisson(requestLambda[0], i) for i in range(ml1)])
ProbRequestLocation2 = np.asarray([poisson(requestLambda[1], i) for i in range(ml2)])
ProbRequestLocation3 = np.asarray([poisson(requestLambda[2], i) for i in range(ml3)])

RewardRequestLocation1 = np.asarray([poisson(requestLambda[0], i)*i*10 for i in range(ml1)])
RewardRequestLocation2 = np.asarray([poisson(requestLambda[1], i)*i*10 for i in range(ml2)])
RewardRequestLocation3 = np.asarray([poisson(requestLambda[2], i)*i*10 for i in range(ml3)])

ProbReturnLocation1 = np.asarray([poisson(returnLambda[0], i) for i in range(ml1)])
ProbReturnLocation2 = np.asarray([poisson(returnLambda[1], i) for i in range(ml2)])
ProbReturnLocation3 = np.asarray([poisson(returnLambda[2], i) for i in range(ml3)])


for (reqStart, reqEnd, retStart, retEnd) in itertools.product(n1, n1, n1, n1):  #start, End inclusive
    if(reqEnd - reqStart + 1 != retEnd - retStart + 1):
        continue
        
    JointProbLocation1[reqStart, reqEnd, retStart, retEnd] = np.sum(
        ProbRequestLocation1[reqStart:reqEnd+1]*ProbReturnLocation1[retStart:retEnd+1])
    JointRewardLocation1[reqStart, reqEnd, retStart, retEnd] = np.sum(
        RewardRequestLocation1[reqStart:reqEnd+1]*ProbReturnLocation1[retStart:retEnd+1])
    
for (reqStart, reqEnd, retStart, retEnd) in itertools.product(n2, n2, n2, n2):
    if(reqEnd - reqStart + 1 != retEnd - retStart + 1):
        continue
    
    JointProbLocation2[reqStart, reqEnd, retStart, retEnd] = np.sum(
        ProbRequestLocation2[reqStart:reqEnd+1]*ProbReturnLocation2[retStart:retEnd+1])
    JointRewardLocation2[reqStart, reqEnd, retStart, retEnd] = np.sum(
        RewardRequestLocation2[reqStart:reqEnd+1]*ProbReturnLocation2[retStart:retEnd+1])
    
for (reqStart, reqEnd, retStart, retEnd) in itertools.product(n3, n3, n3, n3):
    if(reqEnd - reqStart + 1 != retEnd - retStart + 1):
        continue
    
    JointProbLocation3[reqStart, reqEnd, retStart, retEnd] = np.sum(
        ProbRequestLocation3[reqStart:reqEnd+1]*ProbReturnLocation3[retStart:retEnd+1])
    JointRewardLocation3[reqStart, reqEnd, retStart, retEnd] = np.sum(
        RewardRequestLocation3[reqStart:reqEnd+1]*ProbReturnLocation3[retStart:retEnd+1])

#=========================_________________________========================#

class State:
    def __init__(self, car1, car2, car3):
        self.car1 = car1
        self.car2 = car2
        self.car3 = car3
        
def normInfinity(currValue, optimalValue):
    maxDiff = np.max(np.abs(currValue - optimalValue))
    return maxDiff
        

def isValidState(car1, car2, car3):
     return car1 >= 0 and car2 >=0 and car3 >=0 and car1 <ml1 and car2 < ml2 and car3 < ml3
    
def normalize(car1, car2, car3):
    car1 = min(car1, ml1 - 1)
    car2 = min(car2, ml2 - 1)
    car3 = min(car3, ml3 - 1)
    
    return car1, car2, car3

def getCost(mv1, mv2, mv3):
    cost = np.abs(mv1)*2
    return cost

MOVES = []
for mv1, mv2, mv3 in itertools.product(n4, n4, n4):
    if(mv1 + mv2 + mv3 != 0):
        continue
    MOVES.append((mv1, mv2, mv3))


#================================ Value Iteration ===========================#
def ValueIteration(V, Policy):
    for iter1 in (range(numIter)):
        oldV = V.copy()
        print("iter: ", iter1)
        a = time.time()
        #State value
        for i, j, k in itertools.product(n1, n2, n3):
            maxReward = -10000.0
            for (mv1, mv2, mv3) in MOVES:
#                 print("move1 = ", mv1, mv2, mv3)
                reward = 0.0
                car1 = i + mv1
                car2 = j + mv2
                car3 = k + mv3
#                 print(i, j, k, car1, car2, car3)
                
                if(not isValidState(car1, car2, car3)):
                    continue
                
                reward-=getCost(mv1, mv2, mv3)
#                 print("Move2 = ", mv1, mv2, mv3)
                
                for i1, j1, k1 in itertools.product(n1, n2, n3):
                    reqStart = max(0, car1 - i1)
                    retStart = max(0, i1 - car1)
                    retEnd = i1
                    p1_sum = JointProbLocation1[reqStart, car1, retStart, retEnd]
                    r1_sum = JointRewardLocation1[reqStart, car1, retStart, retEnd]
                    reqStart = max(0, car2 - j1)
                    retStart = max(0, j1 - car2)
                    retEnd = j1
                    p2_sum = JointProbLocation2[reqStart, car2, retStart, retEnd]
                    r2_sum = JointRewardLocation2[reqStart, car2, retStart, retEnd]
                    reqStart = max(0, car3 - k1)
                    retStart = max(0, k1 - car3)
                    retEnd = k1
                    p3_sum = JointProbLocation3[reqStart, car3, retStart, retEnd]
                    r3_sum = JointRewardLocation3[reqStart, car3, retStart, retEnd]

                    immediateReward = p1_sum*p2_sum*r3_sum + p1_sum*r2_sum*p3_sum + r1_sum*p2_sum*p3_sum
                    reward+= immediateReward + gamma*p1_sum*p2_sum*p3_sum*oldV[i1, j1, k1] 
                if(reward > maxReward):
                    maxReward = reward
#                     print("here")
                    Policy[i, j, k] = np.array([mv1, mv2, mv3])
            V[i, j, k] = maxReward 
            
        if(normInfinity(V, oldV) <= 0.1):
            print("Converged in {} iteration".format(iter1 + 2))
            break
        print(time.time() - a)
    return V, Policy
#============================ _________________________ ===================#

In [442]:
import time
Size = (ml1, ml2, ml3)
Value = np.zeros(Size)
Policy = np.zeros(Size + tuple([3]))
a = time.time()
V, P = ValueIteration(Value, Policy)
print("Time Taken = ",time.time() - a)
np.savetxt("Value.txt", np.ravel(V).reshape(50, 5), fmt = "%.2f", header = 'values')
np.savetxt("Policy.txt", np.ravel(P).reshape(250, 3), fmt = "%i")
print("Value.txt Saved")
print("Policy.txt Saved")

iter:  0
6.4685938358306885
iter:  1
7.132618427276611
iter:  2
6.022885799407959
iter:  3
6.0798728466033936
iter:  4
5.814639568328857
iter:  5
5.9112160205841064
iter:  6
Converged in 8 iteration
Time Taken =  43.431660413742065
Value.txt Saved
Policy.txt Saved


#### Appproach2 for PreCalculation

In [501]:
##########CONSTANTS###########
requestLambda = [3, 2, 2]
returnLambda = [3, 1, 1]

#Number of maximum cars allowed at loc1, loc2, loc3
ml1, ml2, ml3 = 10, 5, 5

gamma = 0.9
numIter = 2
tol = 0.01
Size = (ml1, ml2, ml3)

n1 = np.arange(ml1)
n2 = np.arange(ml2)
n3 = np.arange(ml3)
n4 = np.arange(-5, 6)
#=============================#

#================PreCalculations of Probability and Rewards============#
JointProbLocation1 = np.ones((ml1, ml1))*0
JointProbLocation2 = np.ones((ml2, ml2))*0
JointProbLocation3 = np.ones((ml3, ml3))*0
JointRewardLocation1 = np.ones((ml1, ml1))*0
JointRewardLocation2 = np.ones((ml2, ml2))*0
JointRewardLocation3 = np.ones((ml3, ml3))*0

ProbRequestLocation1 = np.asarray([poisson(requestLambda[0], i) for i in range(ml1)])
ProbRequestLocation2 = np.asarray([poisson(requestLambda[1], i) for i in range(ml2)])
ProbRequestLocation3 = np.asarray([poisson(requestLambda[2], i) for i in range(ml3)])

RewardRequestLocation1 = np.asarray([poisson(requestLambda[0], i)*i*10 for i in range(ml1)])
RewardRequestLocation2 = np.asarray([poisson(requestLambda[1], i)*i*10 for i in range(ml2)])
RewardRequestLocation3 = np.asarray([poisson(requestLambda[2], i)*i*10 for i in range(ml3)])

ProbReturnLocation1 = np.asarray([poisson(returnLambda[0], i) for i in range(ml1)])
ProbReturnLocation2 = np.asarray([poisson(returnLambda[1], i) for i in range(ml2)])
ProbReturnLocation3 = np.asarray([poisson(returnLambda[2], i) for i in range(ml3)])


for (currState, nextState) in itertools.product(n1, n1):  
    for request, ret in itertools.product(n1, n1):
        if(request > currState or currState-request+ret != nextState):
            continue
        JointProbLocation1[currState, nextState] += (ProbRequestLocation1[request]
                            *ProbReturnLocation1[ret])
        JointRewardLocation1[currState, nextState] += (ProbRequestLocation1[request]
                            *ProbReturnLocation1[ret])*request*10
            
for (currState, nextState) in itertools.product(n2, n2):  
    for request, ret in itertools.product(n2, n2):
        if(request > currState or currState-request+ret != nextState):
            continue
        JointProbLocation2[currState, nextState] += (ProbRequestLocation2[request]
                            *ProbReturnLocation2[ret])
        JointRewardLocation2[currState, nextState] += (ProbRequestLocation3[request]
                            *ProbReturnLocation2[ret])*request*10

for (currState, nextState) in itertools.product(n3, n3):  
    for request, ret in itertools.product(n3, n3):
        if(request > currState or currState-request+ret != nextState):
            continue
        JointProbLocation3[currState, nextState] += (ProbRequestLocation3[request]
                            *ProbReturnLocation3[ret])
        JointRewardLocation3[currState, nextState] += (ProbRequestLocation3[request]
                            *ProbReturnLocation3[ret])*request*10
#=========================_________________________========================#
        
def normInfinity(currValue, optimalValue):
    maxDiff = np.max(np.abs(currValue - optimalValue))
    return maxDiff
        

def isValidState(car1, car2, car3):
     return car1 >= 0 and car2 >=0 and car3 >=0 and car1 <ml1 and car2 < ml2 and car3 < ml3

def getCost(mv1, mv2, mv3):
    cost = np.abs(mv1)*2
    return cost

MOVES = []
for mv1, mv2, mv3 in itertools.product(n4, n4, n4):
    if(mv1 + mv2 + mv3 != 0):
        continue
    MOVES.append((mv1, mv2, mv3))


#================================ Value Iteration ===========================#
def ValueIteration(V, Policy):
    for iter1 in (range(numIter)):
        oldV = V.copy()
        if(iter1 == 1):
            print(V)
            break
        print("iter: ", iter1)
        a = time.time()
        #State value
        for i, j, k in itertools.product(n1, n2, n3):
            maxReward = -10000.0
            for (mv1, mv2, mv3) in MOVES:
#                 print("move1 = ", mv1, mv2, mv3)
                reward = 0.0
                car1 = i + mv1
                car2 = j + mv2
                car3 = k + mv3
#                 print(i, j, k, car1, car2, car3)
                
                if(not isValidState(car1, car2, car3)):
                    continue
                
                reward-=getCost(mv1, mv2, mv3)
                x = 0
                y = 0
                for i1, j1, k1 in itertools.product(n1, n2, n3):
                    p1_sum = JointProbLocation1[car1, i1]
                    r1_sum = JointRewardLocation1[car1, i1]
                    
                    p2_sum = JointProbLocation2[car2, j1]
                    r2_sum = JointRewardLocation2[car2, j1]
                    
                    p3_sum = JointProbLocation3[car3, k1]
                    r3_sum = JointRewardLocation3[car3, k1]

                    immediateReward = p1_sum*p2_sum*r3_sum + p1_sum*r2_sum*p3_sum + r1_sum*p2_sum*p3_sum
                    x+=immediateReward
                    reward+= immediateReward + gamma*p1_sum*p2_sum*p3_sum*oldV[i1, j1, k1]
                   
                    y+=gamma*p1_sum*p2_sum*p3_sum*oldV[i1, j1, k1]
#                 print(reward)
                print("x = ", i, j, k, x)
                print("y = ", i, j, k, y)
                print("reward = ", i, j, k, reward)
                if(reward > maxReward):
                    maxReward = reward
                    Policy[i, j, k] = np.array([mv1, mv2, mv3])
            V[i, j, k] = maxReward 
            print("maxReward = ", i, j, k, maxReward)
            
        if(normInfinity(V, oldV) <= 0.1):
            print("Converged in {} iteration".format(iter1 + 2))
            break
        print(time.time() - a)
    return V, Policy

## np.max(JointProbLocation1)

In [502]:
import time
Size = (ml1, ml2, ml3)
Value = np.zeros(Size)
Policy = np.zeros(Size + tuple([3]))
a = time.time()
V, P = ValueIteration(Value, Policy)
print("Time Taken = ",time.time() - a)
np.savetxt("Value1.txt", np.ravel(V).reshape(50, 5), fmt = "%.2f", header = 'values')
np.savetxt("Policy1.txt", np.ravel(P).reshape(250, 3), fmt = "%i")
print("Value1.txt Saved")
print("Policy1.txt Saved")

iter:  0
x =  0 0 0 0.0
y =  0 0 0 0.0
reward =  0 0 0 0.0
maxReward =  0 0 0 0.0
x =  0 0 1 0.018084429787316194
y =  0 0 1 0.0
reward =  0 0 1 0.018084429787316194
x =  0 0 1 0.018084429787316194
y =  0 0 1 0.0
reward =  0 0 1 0.018084429787316194
x =  0 0 1 0.027126644680974303
y =  0 0 1 0.0
reward =  0 0 1 -1.9728733553190254
maxReward =  0 0 1 0.018084429787316194
x =  0 0 2 0.05397506736522065
y =  0 0 2 0.0
reward =  0 0 2 0.05397506736522065
x =  0 0 2 0.10795013473044131
y =  0 0 2 0.0
reward =  0 0 2 0.10795013473044131
x =  0 0 2 0.05397506736522067
y =  0 0 2 0.0
reward =  0 0 2 0.05397506736522067
x =  0 0 2 0.15325142922163504
y =  0 0 2 0.0
reward =  0 0 2 -1.8467485707783653
x =  0 0 2 0.153251429221635
y =  0 0 2 0.0
reward =  0 0 2 -1.8467485707783653
x =  0 0 2 0.10843324226070605
y =  0 0 2 0.0
reward =  0 0 2 -3.891566757739296
maxReward =  0 0 2 0.10795013473044131
x =  0 0 3 0.08847459495948537
y =  0 0 3 0.0
reward =  0 0 3 0.08847459495948537
x =  0 0 3 0.2495

reward =  0 2 3 0.42033208553869283
x =  0 2 3 0.7506058509057559
y =  0 2 3 0.0
reward =  0 2 3 0.7506058509057559
x =  0 2 3 0.7506058509057562
y =  0 2 3 0.0
reward =  0 2 3 0.7506058509057562
x =  0 2 3 0.42033208553869283
y =  0 2 3 0.0
reward =  0 2 3 0.42033208553869283
x =  0 2 3 0.579434070567614
y =  0 2 3 0.0
reward =  0 2 3 -1.4205659294323862
x =  0 2 3 1.9577553303048019
y =  0 2 3 0.0
reward =  0 2 3 -0.04224466969519812
x =  0 2 3 2.7603407318616533
y =  0 2 3 0.0
reward =  0 2 3 0.7603407318616534
x =  0 2 3 1.9577553303048016
y =  0 2 3 0.0
reward =  0 2 3 -0.04224466969519801
x =  0 2 3 0.579434070567614
y =  0 2 3 0.0
reward =  0 2 3 -1.4205659294323862
x =  0 2 3 1.3887256888539021
y =  0 2 3 0.0
reward =  0 2 3 -2.6112743111461025
x =  0 2 3 3.6999220374117545
y =  0 2 3 0.0
reward =  0 2 3 -0.3000779625882462
x =  0 2 3 3.699922037411753
y =  0 2 3 0.0
reward =  0 2 3 -0.30007796258824637
x =  0 2 3 1.3887256888539021
y =  0 2 3 0.0
reward =  0 2 3 -2.61127431114

reward =  1 0 1 -1.8915667577392945
maxReward =  1 0 1 0.15325142922163504
x =  1 0 2 0.08847459495948537
y =  1 0 2 0.0
reward =  1 0 2 -1.9115254050405173
x =  1 0 2 0.24956941140337482
y =  1 0 2 0.0
reward =  1 0 2 -1.7504305885966274
x =  1 0 2 0.24956941140337485
y =  1 0 2 0.0
reward =  1 0 2 -1.7504305885966274
x =  1 0 2 0.08847459495948537
y =  1 0 2 0.0
reward =  1 0 2 -1.9115254050405173
x =  1 0 2 0.3484662411425819
y =  1 0 2 0.0
reward =  1 0 2 0.3484662411425819
x =  1 0 2 0.6731509220725411
y =  1 0 2 0.0
reward =  1 0 2 0.6731509220725411
x =  1 0 2 0.34846624114258185
y =  1 0 2 0.0
reward =  1 0 2 0.34846624114258185
x =  1 0 2 0.47700693941899264
y =  1 0 2 0.0
reward =  1 0 2 -1.5229930605810067
x =  1 0 2 0.47700693941899264
y =  1 0 2 0.0
reward =  1 0 2 -1.5229930605810067
x =  1 0 2 0.2300631245459432
y =  1 0 2 0.0
reward =  1 0 2 -3.7699368754540528
maxReward =  1 0 2 0.6731509220725411
x =  1 0 3 0.10646628408122569
y =  1 0 3 0.0
reward =  1 0 3 -1.8935337

reward =  1 2 1 -1.6294710786550792
x =  1 2 1 0.10646628408122566
y =  1 2 1 0.0
reward =  1 2 1 -1.8935337159187755
x =  1 2 1 0.5133586162644487
y =  1 2 1 0.0
reward =  1 2 1 0.5133586162644487
x =  1 2 1 1.3936968944758548
y =  1 2 1 0.0
reward =  1 2 1 1.3936968944758548
x =  1 2 1 1.3936968944758545
y =  1 2 1 0.0
reward =  1 2 1 1.3936968944758545
x =  1 2 1 0.5133586162644486
y =  1 2 1 0.0
reward =  1 2 1 0.5133586162644486
x =  1 2 1 0.988255412515588
y =  1 2 1 0.0
reward =  1 2 1 -1.0117445874844098
x =  1 2 1 1.881448877259896
y =  1 2 1 0.0
reward =  1 2 1 -0.11855112274010346
x =  1 2 1 0.9882554125155879
y =  1 2 1 0.0
reward =  1 2 1 -1.0117445874844093
x =  1 2 1 0.9203541353869856
y =  1 2 1 0.0
reward =  1 2 1 -3.0796458646130125
x =  1 2 1 0.920354135386986
y =  1 2 1 0.0
reward =  1 2 1 -3.0796458646130125
x =  1 2 1 0.3505562916517171
y =  1 2 1 0.0
reward =  1 2 1 -5.649443708348297
maxReward =  1 2 1 1.3936968944758548
x =  1 2 2 0.42033208553869283
y =  1 2 2

maxReward =  1 3 2 5.074384748644443
x =  1 3 3 1.1285958776945546
y =  1 3 3 0.0
reward =  1 3 3 -0.871404122305447
x =  1 3 3 1.1285958776945542
y =  1 3 3 0.0
reward =  1 3 3 -0.8714041223054465
x =  1 3 3 4.05848416916622
y =  1 3 3 0.0
reward =  1 3 3 4.05848416916622
x =  1 3 3 5.10428944489899
y =  1 3 3 0.0
reward =  1 3 3 5.10428944489899
x =  1 3 3 4.058484169166221
y =  1 3 3 0.0
reward =  1 3 3 4.058484169166221
x =  1 3 3 5.400439810972953
y =  1 3 3 0.0
reward =  1 3 3 3.4004398109729492
x =  1 3 3 9.489027668250978
y =  1 3 3 0.0
reward =  1 3 3 7.489027668250972
x =  1 3 3 9.489027668250978
y =  1 3 3 0.0
reward =  1 3 3 7.489027668250972
x =  1 3 3 5.400439810972953
y =  1 3 3 0.0
reward =  1 3 3 3.400439810972949
x =  1 3 3 2.6807293895312534
y =  1 3 3 0.0
reward =  1 3 3 -1.3192706104687502
x =  1 3 3 8.830758427750085
y =  1 3 3 0.0
reward =  1 3 3 4.830758427750085
x =  1 3 3 12.331422895310675
y =  1 3 3 0.0
reward =  1 3 3 8.331422895310682
x =  1 3 3 8.83075842

y =  1 4 2 0.0
reward =  1 4 2 -8.237229845076373
x =  1 4 2 1.7627701549236179
y =  1 4 2 0.0
reward =  1 4 2 -8.237229845076373
maxReward =  1 4 2 8.331422895310682
x =  1 4 3 1.2077098481419033
y =  1 4 3 0.0
reward =  1 4 3 -0.7922901518580979
x =  1 4 3 5.4171147554325465
y =  1 4 3 0.0
reward =  1 4 3 5.4171147554325465
x =  1 4 3 5.417114755432546
y =  1 4 3 0.0
reward =  1 4 3 5.417114755432546
x =  1 4 3 10.022684387001107
y =  1 4 3 0.0
reward =  1 4 3 8.022684387001114
x =  1 4 3 12.593179795011554
y =  1 4 3 0.0
reward =  1 4 3 10.59317979501156
x =  1 4 3 10.022684387001107
y =  1 4 3 0.0
reward =  1 4 3 8.02268438700111
x =  1 4 3 9.326468429496009
y =  1 4 3 0.0
reward =  1 4 3 5.3264684294960105
x =  1 4 3 16.326296009791566
y =  1 4 3 0.0
reward =  1 4 3 12.326296009791578
x =  1 4 3 16.326296009791562
y =  1 4 3 0.0
reward =  1 4 3 12.326296009791578
x =  1 4 3 9.326468429496009
y =  1 4 3 0.0
reward =  1 4 3 5.3264684294960105
x =  1 4 3 3.7060039420798896
y =  1 4 3

x =  2 1 0 0.47700693941899264
y =  2 1 0 0.0
reward =  2 1 0 0.47700693941899264
x =  2 1 0 0.47700693941899264
y =  2 1 0 0.0
reward =  2 1 0 0.47700693941899264
x =  2 1 0 0.2300631245459432
y =  2 1 0 0.0
reward =  2 1 0 -1.769936875454053
maxReward =  2 1 0 0.47700693941899264
x =  2 1 1 0.10646628408122569
y =  2 1 1 0.0
reward =  2 1 1 -3.893533715918773
x =  2 1 1 0.37052892134492066
y =  2 1 1 0.0
reward =  2 1 1 -3.6294710786550777
x =  2 1 1 0.528125274527391
y =  2 1 1 0.0
reward =  2 1 1 -3.4718747254726146
x =  2 1 1 0.37052892134492055
y =  2 1 1 0.0
reward =  2 1 1 -3.6294710786550777
x =  2 1 1 0.10646628408122566
y =  2 1 1 0.0
reward =  2 1 1 -3.893533715918773
x =  2 1 1 0.5133586162644487
y =  2 1 1 0.0
reward =  2 1 1 -1.4866413837355485
x =  2 1 1 1.3936968944758548
y =  2 1 1 0.0
reward =  2 1 1 -0.6063031055241439
x =  2 1 1 1.3936968944758545
y =  2 1 1 0.0
reward =  2 1 1 -0.6063031055241433
x =  2 1 1 0.5133586162644486
y =  2 1 1 0.0
reward =  2 1 1 -1.4866

reward =  2 2 2 4.584475985842607
x =  2 2 2 6.584475985842605
y =  2 2 2 0.0
reward =  2 2 2 4.584475985842609
x =  2 2 2 2.497775735739667
y =  2 2 2 0.0
reward =  2 2 2 0.4977757357396664
x =  2 2 2 2.585859702793697
y =  2 2 2 0.0
reward =  2 2 2 -1.414140297206306
x =  2 2 2 4.864391475580774
y =  2 2 2 0.0
reward =  2 2 2 0.8643914755807796
x =  2 2 2 2.5858597027936967
y =  2 2 2 0.0
reward =  2 2 2 -1.414140297206306
x =  2 2 2 1.627491485576247
y =  2 2 2 0.0
reward =  2 2 2 -4.372508514423754
x =  2 2 2 1.6274914855762475
y =  2 2 2 0.0
reward =  2 2 2 -4.372508514423754
x =  2 2 2 0.48186869210630195
y =  2 2 2 0.0
reward =  2 2 2 -7.5181313078937135
maxReward =  2 2 2 7.074384748644441
x =  2 2 3 1.1285958776945546
y =  2 2 3 0.0
reward =  2 2 3 -2.8714041223054445
x =  2 2 3 1.1285958776945542
y =  2 2 3 0.0
reward =  2 2 3 -2.8714041223054445
x =  2 2 3 4.05848416916622
y =  2 2 3 0.0
reward =  2 2 3 2.0584841691662166
x =  2 2 3 5.10428944489899
y =  2 2 3 0.0
reward =  

y =  2 4 1 0.0
reward =  2 4 1 -0.508768446058428
x =  2 4 1 9.145240396042906
y =  2 4 1 0.0
reward =  2 4 1 5.145240396042904
x =  2 4 1 9.145240396042905
y =  2 4 1 0.0
reward =  2 4 1 5.1452403960429045
x =  2 4 1 3.491231553941568
y =  2 4 1 0.0
reward =  2 4 1 -0.5087684460584277
x =  2 4 1 3.099960758158446
y =  2 4 1 0.0
reward =  2 4 1 -2.9000392418415566
x =  2 4 1 5.816231267398045
y =  2 4 1 0.0
reward =  2 4 1 -0.18376873260195803
x =  2 4 1 3.0999607581584456
y =  2 4 1 0.0
reward =  2 4 1 -2.900039241841556
x =  2 4 1 1.7627701549236179
y =  2 4 1 0.0
reward =  2 4 1 -6.237229845076388
x =  2 4 1 1.7627701549236179
y =  2 4 1 0.0
reward =  2 4 1 -6.237229845076388
x =  2 4 1 0.48686616464798504
y =  2 4 1 0.0
reward =  2 4 1 -9.513133835352033
maxReward =  2 4 1 10.331422895310675
x =  2 4 2 1.2077098481419033
y =  2 4 2 0.0
reward =  2 4 2 -2.7922901518580994
x =  2 4 2 5.4171147554325465
y =  2 4 2 0.0
reward =  2 4 2 3.417114755432551
x =  2 4 2 5.417114755432546
y = 

x =  3 1 3 4.05848416916622
y =  3 1 3 0.0
reward =  3 1 3 0.05848416916621835
x =  3 1 3 5.10428944489899
y =  3 1 3 0.0
reward =  3 1 3 1.1042894448989973
x =  3 1 3 4.058484169166221
y =  3 1 3 0.0
reward =  3 1 3 0.05848416916621797
x =  3 1 3 5.400439810972953
y =  3 1 3 0.0
reward =  3 1 3 3.4004398109729492
x =  3 1 3 9.489027668250978
y =  3 1 3 0.0
reward =  3 1 3 7.489027668250972
x =  3 1 3 9.489027668250978
y =  3 1 3 0.0
reward =  3 1 3 7.489027668250972
x =  3 1 3 5.400439810972953
y =  3 1 3 0.0
reward =  3 1 3 3.400439810972949
x =  3 1 3 2.6807293895312534
y =  3 1 3 0.0
reward =  3 1 3 2.6807293895312534
x =  3 1 3 8.830758427750085
y =  3 1 3 0.0
reward =  3 1 3 8.830758427750085
x =  3 1 3 12.331422895310675
y =  3 1 3 0.0
reward =  3 1 3 12.331422895310675
x =  3 1 3 8.830758427750085
y =  3 1 3 0.0
reward =  3 1 3 8.830758427750085
x =  3 1 3 2.6807293895312525
y =  3 1 3 0.0
reward =  3 1 3 2.6807293895312525
x =  3 1 3 3.4912315539415686
y =  3 1 3 0.0
reward = 

reward =  3 2 3 -2.673811260754719
x =  3 2 3 6.229929754097247
y =  3 2 3 0.0
reward =  3 2 3 0.2299297540972495
x =  3 2 3 3.326188739245282
y =  3 2 3 0.0
reward =  3 2 3 -2.6738112607547198
x =  3 2 3 1.758363486590568
y =  3 2 3 0.0
reward =  3 2 3 -6.241636513409428
x =  3 2 3 1.7583634865905675
y =  3 2 3 0.0
reward =  3 2 3 -6.241636513409428
x =  3 2 3 0.457882862000255
y =  3 2 3 0.0
reward =  3 2 3 -9.542117137999755
maxReward =  3 2 3 16.326296009791566
x =  3 2 4 5.700218373850307
y =  3 2 4 0.0
reward =  3 2 4 1.7002183738503078
x =  3 2 4 13.192387746031605
y =  3 2 4 0.0
reward =  3 2 4 11.192387746031608
x =  3 2 4 13.192387746031608
y =  3 2 4 0.0
reward =  3 2 4 11.192387746031605
x =  3 2 4 17.071127117907082
y =  3 2 4 0.0
reward =  3 2 4 17.071127117907082
x =  3 2 4 21.435977391552044
y =  3 2 4 0.0
reward =  3 2 4 21.435977391552044
x =  3 2 4 17.07112711790709
y =  3 2 4 0.0
reward =  3 2 4 17.07112711790709
x =  3 2 4 12.715873510493067
y =  3 2 4 0.0
reward =

reward =  3 3 3 15.856682707711967
x =  3 3 3 14.27205533416515
y =  3 3 3 0.0
reward =  3 3 3 10.272055334165156
x =  3 3 3 4.373547347395737
y =  3 3 3 0.0
reward =  3 3 3 0.3735473473957364
x =  3 3 3 4.42478020130335
y =  3 3 3 0.0
reward =  3 3 3 -1.575219798696654
x =  3 3 3 11.515326804907044
y =  3 3 3 0.0
reward =  3 3 3 5.515326804907044
x =  3 3 3 11.515326804907042
y =  3 3 3 0.0
reward =  3 3 3 5.515326804907043
x =  3 3 3 4.424780201303349
y =  3 3 3 0.0
reward =  3 3 3 -1.575219798696654
x =  3 3 3 3.2929685719501403
y =  3 3 3 0.0
reward =  3 3 3 -4.707031428049863
x =  3 3 3 6.15910820334261
y =  3 3 3 0.0
reward =  3 3 3 -1.8408917966573874
x =  3 3 3 3.2929685719501403
y =  3 3 3 0.0
reward =  3 3 3 -4.707031428049863
x =  3 3 3 1.632632627617168
y =  3 3 3 0.0
reward =  3 3 3 -8.367367372382835
x =  3 3 3 1.6326326276171688
y =  3 3 3 0.0
reward =  3 3 3 -8.367367372382835
maxReward =  3 3 3 21.435977391552044
x =  3 3 4 13.73089538026746
y =  3 3 4 0.0
reward =  3 

reward =  3 4 3 -1.3560939582214007
x =  3 4 3 15.117112727363121
y =  3 4 3 0.0
reward =  3 4 3 9.117112727363113
x =  3 4 3 21.01210718576151
y =  3 4 3 0.0
reward =  3 4 3 15.012107185761502
x =  3 4 3 15.117112727363123
y =  3 4 3 0.0
reward =  3 4 3 9.117112727363109
x =  3 4 3 4.643906041778605
y =  3 4 3 0.0
reward =  3 4 3 -1.3560939582214016
x =  3 4 3 4.359671062810508
y =  3 4 3 0.0
reward =  3 4 3 -3.640328937189502
x =  3 4 3 11.321647226190477
y =  3 4 3 0.0
reward =  3 4 3 3.321647226190478
x =  3 4 3 11.321647226190478
y =  3 4 3 0.0
reward =  3 4 3 3.3216472261904757
x =  3 4 3 4.359671062810508
y =  3 4 3 0.0
reward =  3 4 3 -3.640328937189502
x =  3 4 3 3.034096283445243
y =  3 4 3 0.0
reward =  3 4 3 -6.965903716554765
x =  3 4 3 5.666772893316768
y =  3 4 3 0.0
reward =  3 4 3 -4.333227106683243
x =  3 4 3 3.034096283445243
y =  3 4 3 0.0
reward =  3 4 3 -6.965903716554765
maxReward =  3 4 3 26.957124321834453
x =  3 4 4 23.008134613105263
y =  3 4 4 0.0
reward =  

x =  4 0 4 3.326188739245282
y =  4 0 4 0.0
reward =  4 0 4 -0.6738112607547194
x =  4 0 4 1.758363486590568
y =  4 0 4 0.0
reward =  4 0 4 -4.241636513409428
x =  4 0 4 1.7583634865905675
y =  4 0 4 0.0
reward =  4 0 4 -4.241636513409428
x =  4 0 4 0.457882862000255
y =  4 0 4 0.0
reward =  4 0 4 -7.5421171379997585
maxReward =  4 0 4 16.911191876095405
x =  4 1 0 0.42033208553869283
y =  4 1 0 0.0
reward =  4 1 0 -7.579667914461305
x =  4 1 0 0.7506058509057559
y =  4 1 0 0.0
reward =  4 1 0 -7.249394149094246
x =  4 1 0 0.7506058509057562
y =  4 1 0 0.0
reward =  4 1 0 -7.249394149094246
x =  4 1 0 0.42033208553869283
y =  4 1 0 0.0
reward =  4 1 0 -7.579667914461305
x =  4 1 0 0.579434070567614
y =  4 1 0 0.0
reward =  4 1 0 -5.420565929432386
x =  4 1 0 1.9577553303048019
y =  4 1 0 0.0
reward =  4 1 0 -4.042244669695194
x =  4 1 0 2.7603407318616533
y =  4 1 0 0.0
reward =  4 1 0 -3.2396592681383423
x =  4 1 0 1.9577553303048016
y =  4 1 0 0.0
reward =  4 1 0 -4.042244669695194
x

x =  4 1 4 6.15910820334261
y =  4 1 4 0.0
reward =  4 1 4 0.15910820334261203
x =  4 1 4 3.2929685719501403
y =  4 1 4 0.0
reward =  4 1 4 -2.7070314280498615
x =  4 1 4 1.632632627617168
y =  4 1 4 0.0
reward =  4 1 4 -6.367367372382828
x =  4 1 4 1.6326326276171688
y =  4 1 4 0.0
reward =  4 1 4 -6.367367372382828
x =  4 1 4 0.3990425900092249
y =  4 1 4 0.0
reward =  4 1 4 -9.600957409990793
maxReward =  4 1 4 22.206914211190536
x =  4 2 0 0.8270013308688381
y =  4 2 0 0.0
reward =  4 2 0 -7.172998669131165
x =  4 2 0 1.0417316411639923
y =  4 2 0 0.0
reward =  4 2 0 -6.9582683588360075
x =  4 2 0 0.827001330868838
y =  4 2 0 0.0
reward =  4 2 0 -7.172998669131165
x =  4 2 0 2.1393952463106554
y =  4 2 0 0.0
reward =  4 2 0 -3.860604753689344
x =  4 2 0 3.7816928832191503
y =  4 2 0 0.0
reward =  4 2 0 -2.2183071167808497
x =  4 2 0 3.7816928832191508
y =  4 2 0 0.0
reward =  4 2 0 -2.2183071167808506
x =  4 2 0 2.139395246310656
y =  4 2 0 0.0
reward =  4 2 0 -3.860604753689345
x 

reward =  4 2 4 0.6439060417786041
x =  4 2 4 4.359671062810508
y =  4 2 4 0.0
reward =  4 2 4 -1.6403289371894978
x =  4 2 4 11.321647226190477
y =  4 2 4 0.0
reward =  4 2 4 5.321647226190474
x =  4 2 4 11.321647226190478
y =  4 2 4 0.0
reward =  4 2 4 5.321647226190474
x =  4 2 4 4.359671062810508
y =  4 2 4 0.0
reward =  4 2 4 -1.6403289371894967
x =  4 2 4 3.034096283445243
y =  4 2 4 0.0
reward =  4 2 4 -4.965903716554763
x =  4 2 4 5.666772893316768
y =  4 2 4 0.0
reward =  4 2 4 -2.333227106683228
x =  4 2 4 3.034096283445243
y =  4 2 4 0.0
reward =  4 2 4 -4.965903716554762
x =  4 2 4 1.4027073875093916
y =  4 2 4 0.0
reward =  4 2 4 -8.597292612490618
x =  4 2 4 1.4027073875093914
y =  4 2 4 0.0
reward =  4 2 4 -8.597292612490618
maxReward =  4 2 4 28.957124321834456
x =  4 3 0 1.1285958776945546
y =  4 3 0 0.0
reward =  4 3 0 -6.871404122305449
x =  4 3 0 1.1285958776945542
y =  4 3 0 0.0
reward =  4 3 0 -6.871404122305449
x =  4 3 0 4.05848416916622
y =  4 3 0 0.0
reward = 

x =  4 4 0 5.417114755432546
y =  4 4 0 0.0
reward =  4 4 0 -0.5828852445674498
x =  4 4 0 10.022684387001107
y =  4 4 0 0.0
reward =  4 4 0 6.022684387001117
x =  4 4 0 12.593179795011554
y =  4 4 0 0.0
reward =  4 4 0 8.593179795011556
x =  4 4 0 10.022684387001107
y =  4 4 0 0.0
reward =  4 4 0 6.022684387001114
x =  4 4 0 9.326468429496009
y =  4 4 0 0.0
reward =  4 4 0 7.3264684294960105
x =  4 4 0 16.326296009791566
y =  4 4 0 0.0
reward =  4 4 0 14.326296009791575
x =  4 4 0 16.326296009791562
y =  4 4 0 0.0
reward =  4 4 0 14.326296009791578
x =  4 4 0 9.326468429496009
y =  4 4 0 0.0
reward =  4 4 0 7.326468429496011
x =  4 4 0 3.7060039420798896
y =  4 4 0 0.0
reward =  4 4 0 3.7060039420798896
x =  4 4 0 12.137703971962942
y =  4 4 0 0.0
reward =  4 4 0 12.137703971962942
x =  4 4 0 16.911191876095405
y =  4 4 0 0.0
reward =  4 4 0 16.911191876095405
x =  4 4 0 12.13770397196294
y =  4 4 0 0.0
reward =  4 4 0 12.13770397196294
x =  4 4 0 3.7060039420798887
y =  4 4 0 0.0
rew

reward =  5 0 0 -0.6619512063071848
x =  5 0 0 0.4376596386830735
y =  5 0 0 0.0
reward =  5 0 0 0.4376596386830735
maxReward =  5 0 0 0.4376596386830735
x =  5 0 1 0.8270013308688381
y =  5 0 1 0.0
reward =  5 0 1 -9.172998669131164
x =  5 0 1 1.0417316411639923
y =  5 0 1 0.0
reward =  5 0 1 -8.958268358836023
x =  5 0 1 0.827001330868838
y =  5 0 1 0.0
reward =  5 0 1 -9.172998669131164
x =  5 0 1 2.1393952463106554
y =  5 0 1 0.0
reward =  5 0 1 -5.860604753689343
x =  5 0 1 3.7816928832191503
y =  5 0 1 0.0
reward =  5 0 1 -4.218307116780844
x =  5 0 1 3.7816928832191508
y =  5 0 1 0.0
reward =  5 0 1 -4.218307116780846
x =  5 0 1 2.139395246310656
y =  5 0 1 0.0
reward =  5 0 1 -5.860604753689343
x =  5 0 1 1.517959712441896
y =  5 0 1 0.0
reward =  5 0 1 -4.482040287558103
x =  5 0 1 5.047760661019481
y =  5 0 1 0.0
reward =  5 0 1 -0.952239338980522
x =  5 0 1 7.074384748644441
y =  5 0 1 0.0
reward =  5 0 1 1.0743847486444333
x =  5 0 1 5.047760661019479
y =  5 0 1 0.0
reward 

x =  5 1 4 15.117112727363123
y =  5 1 4 0.0
reward =  5 1 4 13.117112727363116
x =  5 1 4 4.643906041778605
y =  5 1 4 0.0
reward =  5 1 4 2.6439060417786013
x =  5 1 4 4.359671062810508
y =  5 1 4 0.0
reward =  5 1 4 0.3596710628105117
x =  5 1 4 11.321647226190477
y =  5 1 4 0.0
reward =  5 1 4 7.321647226190472
x =  5 1 4 11.321647226190478
y =  5 1 4 0.0
reward =  5 1 4 7.321647226190472
x =  5 1 4 4.359671062810508
y =  5 1 4 0.0
reward =  5 1 4 0.3596710628105119
x =  5 1 4 3.034096283445243
y =  5 1 4 0.0
reward =  5 1 4 -2.9659037165547635
x =  5 1 4 5.666772893316768
y =  5 1 4 0.0
reward =  5 1 4 -0.33322710668322725
x =  5 1 4 3.034096283445243
y =  5 1 4 0.0
reward =  5 1 4 -2.9659037165547626
x =  5 1 4 1.4027073875093916
y =  5 1 4 0.0
reward =  5 1 4 -6.597292612490608
x =  5 1 4 1.4027073875093914
y =  5 1 4 0.0
reward =  5 1 4 -6.597292612490608
maxReward =  5 1 4 26.957124321834453
x =  5 2 0 1.1285958776945546
y =  5 2 0 0.0
reward =  5 2 0 -8.871404122305442
x =  5

y =  5 2 4 0.0
reward =  5 2 4 0.5584974110335057
x =  5 2 4 3.997131347805949
y =  5 2 4 0.0
reward =  5 2 4 -2.0028686521940537
x =  5 2 4 10.357103201082044
y =  5 2 4 0.0
reward =  5 2 4 4.357103201082042
x =  5 2 4 10.357103201082046
y =  5 2 4 0.0
reward =  5 2 4 4.357103201082044
x =  5 2 4 3.997131347805948
y =  5 2 4 0.0
reward =  5 2 4 -2.0028686521940546
x =  5 2 4 2.5841381241176147
y =  5 2 4 0.0
reward =  5 2 4 -5.415861875882384
x =  5 2 4 4.818441040331286
y =  5 2 4 0.0
reward =  5 2 4 -3.1815589596687084
x =  5 2 4 2.5841381241176147
y =  5 2 4 0.0
reward =  5 2 4 -5.415861875882384
maxReward =  5 2 4 33.67376823982527
x =  5 3 0 1.2077098481419033
y =  5 3 0 0.0
reward =  5 3 0 -8.792290151858092
x =  5 3 0 5.4171147554325465
y =  5 3 0 0.0
reward =  5 3 0 -2.5828852445674486
x =  5 3 0 5.417114755432546
y =  5 3 0 0.0
reward =  5 3 0 -2.5828852445674473
x =  5 3 0 10.022684387001107
y =  5 3 0 0.0
reward =  5 3 0 4.02268438700111
x =  5 3 0 12.593179795011554
y =  5

reward =  5 4 0 20.206914211190533
x =  5 4 0 12.71587351049307
y =  5 4 0 0.0
reward =  5 4 0 10.715873510493074
x =  5 4 0 4.373547347395739
y =  5 4 0 0.0
reward =  5 4 0 4.373547347395739
x =  5 4 0 14.272055334165154
y =  5 4 0 0.0
reward =  5 4 0 14.272055334165154
x =  5 4 0 19.85668270771195
y =  5 4 0 0.0
reward =  5 4 0 19.85668270771195
x =  5 4 0 14.27205533416515
y =  5 4 0 0.0
reward =  5 4 0 14.27205533416515
x =  5 4 0 4.373547347395737
y =  5 4 0 0.0
reward =  5 4 0 4.373547347395737
x =  5 4 0 4.42478020130335
y =  5 4 0 0.0
reward =  5 4 0 2.424780201303347
x =  5 4 0 11.515326804907044
y =  5 4 0 0.0
reward =  5 4 0 9.51532680490704
x =  5 4 0 11.515326804907042
y =  5 4 0 0.0
reward =  5 4 0 9.515326804907042
x =  5 4 0 4.424780201303349
y =  5 4 0 0.0
reward =  5 4 0 2.424780201303347
x =  5 4 0 3.2929685719501403
y =  5 4 0 0.0
reward =  5 4 0 -0.7070314280498625
x =  5 4 0 6.15910820334261
y =  5 4 0 0.0
reward =  5 4 0 2.15910820334261
x =  5 4 0 3.292968571950

x =  6 0 1 3.099960758158446
y =  6 0 1 0.0
reward =  6 0 1 1.0999607581584474
x =  6 0 1 5.816231267398045
y =  6 0 1 0.0
reward =  6 0 1 3.8162312673980527
x =  6 0 1 3.0999607581584456
y =  6 0 1 0.0
reward =  6 0 1 1.0999607581584472
x =  6 0 1 1.7627701549236179
y =  6 0 1 0.0
reward =  6 0 1 1.7627701549236179
x =  6 0 1 1.7627701549236179
y =  6 0 1 0.0
reward =  6 0 1 1.7627701549236179
x =  6 0 1 0.48686616464798504
y =  6 0 1 0.0
reward =  6 0 1 -1.5131338353520147
maxReward =  6 0 1 6.331422895310678
x =  6 0 2 5.4171147554325465
y =  6 0 2 0.0
reward =  6 0 2 -4.582885244567457
x =  6 0 2 5.417114755432546
y =  6 0 2 0.0
reward =  6 0 2 -4.582885244567457
x =  6 0 2 10.022684387001107
y =  6 0 2 0.0
reward =  6 0 2 2.02268438700111
x =  6 0 2 12.593179795011554
y =  6 0 2 0.0
reward =  6 0 2 4.593179795011564
x =  6 0 2 10.022684387001107
y =  6 0 2 0.0
reward =  6 0 2 2.0226843870011098
x =  6 0 2 9.326468429496009
y =  6 0 2 0.0
reward =  6 0 2 3.3264684294960105
x =  6 0

y =  6 1 2 0.0
reward =  6 1 2 8.715873510493067
x =  6 1 2 22.206914211190533
y =  6 1 2 0.0
reward =  6 1 2 18.20691421119053
x =  6 1 2 22.206914211190536
y =  6 1 2 0.0
reward =  6 1 2 18.206914211190522
x =  6 1 2 12.71587351049307
y =  6 1 2 0.0
reward =  6 1 2 8.715873510493065
x =  6 1 2 4.373547347395739
y =  6 1 2 0.0
reward =  6 1 2 2.3735473473957365
x =  6 1 2 14.272055334165154
y =  6 1 2 0.0
reward =  6 1 2 12.272055334165154
x =  6 1 2 19.85668270771195
y =  6 1 2 0.0
reward =  6 1 2 17.85668270771196
x =  6 1 2 14.27205533416515
y =  6 1 2 0.0
reward =  6 1 2 12.272055334165152
x =  6 1 2 4.373547347395737
y =  6 1 2 0.0
reward =  6 1 2 2.373547347395736
x =  6 1 2 4.42478020130335
y =  6 1 2 0.0
reward =  6 1 2 4.42478020130335
x =  6 1 2 11.515326804907044
y =  6 1 2 0.0
reward =  6 1 2 11.515326804907044
x =  6 1 2 11.515326804907042
y =  6 1 2 0.0
reward =  6 1 2 11.515326804907042
x =  6 1 2 4.424780201303349
y =  6 1 2 0.0
reward =  6 1 2 4.424780201303349
x =  6

x =  6 2 2 4.359671062810508
y =  6 2 2 0.0
reward =  6 2 2 2.35967106281051
x =  6 2 2 3.034096283445243
y =  6 2 2 0.0
reward =  6 2 2 -0.9659037165547553
x =  6 2 2 5.666772893316768
y =  6 2 2 0.0
reward =  6 2 2 1.6667728933167734
x =  6 2 2 3.034096283445243
y =  6 2 2 0.0
reward =  6 2 2 -0.9659037165547549
x =  6 2 2 1.4027073875093916
y =  6 2 2 0.0
reward =  6 2 2 -4.597292612490608
x =  6 2 2 1.4027073875093914
y =  6 2 2 0.0
reward =  6 2 2 -4.597292612490608
maxReward =  6 2 2 24.957124321834453
x =  6 2 3 23.008134613105263
y =  6 2 3 0.0
reward =  6 2 3 17.008134613105273
x =  6 2 3 29.914078497775044
y =  6 2 3 0.0
reward =  6 2 3 25.914078497775048
x =  6 2 3 29.91407849777504
y =  6 2 3 0.0
reward =  6 2 3 25.91407849777504
x =  6 2 3 26.83482869539044
y =  6 2 3 0.0
reward =  6 2 3 24.834828695390446
x =  6 2 3 33.67376823982527
y =  6 2 3 0.0
reward =  6 2 3 31.67376823982528
x =  6 2 3 26.83482869539044
y =  6 2 3 0.0
reward =  6 2 3 24.834828695390446
x =  6 2 3 1

y =  6 3 3 0.0
reward =  6 3 3 13.336706616083877
x =  6 3 3 4.16317456885133
y =  6 3 3 0.0
reward =  6 3 3 0.16317456885132697
x =  6 3 3 13.496056615265578
y =  6 3 3 0.0
reward =  6 3 3 9.496056615265578
x =  6 3 3 18.728187886559184
y =  6 3 3 0.0
reward =  6 3 3 14.728187886559198
x =  6 3 3 13.49605661526558
y =  6 3 3 0.0
reward =  6 3 3 9.49605661526558
x =  6 3 3 4.16317456885133
y =  6 3 3 0.0
reward =  6 3 3 0.16317456885132653
x =  6 3 3 3.3850285425955406
y =  6 3 3 0.0
reward =  6 3 3 -2.6149714574044634
x =  6 3 3 8.748451593394831
y =  6 3 3 0.0
reward =  6 3 3 2.748451593394832
x =  6 3 3 8.748451593394833
y =  6 3 3 0.0
reward =  6 3 3 2.7484515933948312
x =  6 3 3 3.3850285425955415
y =  6 3 3 0.0
reward =  6 3 3 -2.6149714574044634
maxReward =  6 3 3 35.39792018213991
x =  6 3 4 35.532541408061725
y =  6 3 4 0.0
reward =  6 3 4 33.532541408061725
x =  6 3 4 36.34593705679381
y =  6 3 4 0.0
reward =  6 3 4 36.34593705679381
x =  6 3 4 36.345937056793794
y =  6 3 4 0

y =  6 4 3 0.0
reward =  6 4 3 9.733843119273237
x =  6 4 3 11.349434454927469
y =  6 4 3 0.0
reward =  6 4 3 5.349434454927462
x =  6 4 3 3.50971390497669
y =  6 4 3 0.0
reward =  6 4 3 -2.490286095023313
maxReward =  6 4 3 36.34593705679381
x =  6 4 4 37.17720711646123
y =  6 4 4 0.0
reward =  6 4 4 37.17720711646123
x =  6 4 4 35.306965461952196
y =  6 4 4 0.0
reward =  6 4 4 33.306965461952174
x =  6 4 4 35.30696546195218
y =  6 4 4 0.0
reward =  6 4 4 33.30696546195219
x =  6 4 4 24.87089319155532
y =  6 4 4 0.0
reward =  6 4 4 20.87089319155532
x =  6 4 4 31.19395112894398
y =  6 4 4 0.0
reward =  6 4 4 27.193951128943972
x =  6 4 4 24.870893191555314
y =  6 4 4 0.0
reward =  6 4 4 20.870893191555325
x =  6 4 4 11.675971347580196
y =  6 4 4 0.0
reward =  6 4 4 5.675971347580198
x =  6 4 4 20.280703958096897
y =  6 4 4 0.0
reward =  6 4 4 14.280703958096908
x =  6 4 4 20.280703958096904
y =  6 4 4 0.0
reward =  6 4 4 14.280703958096906
x =  6 4 4 11.675971347580198
y =  6 4 4 0.0


y =  7 1 0 0.0
reward =  7 1 0 -2.293996057920111
x =  7 1 0 12.137703971962942
y =  7 1 0 0.0
reward =  7 1 0 6.137703971962933
x =  7 1 0 16.911191876095405
y =  7 1 0 0.0
reward =  7 1 0 10.911191876095414
x =  7 1 0 12.13770397196294
y =  7 1 0 0.0
reward =  7 1 0 6.137703971962933
x =  7 1 0 3.7060039420798887
y =  7 1 0 0.0
reward =  7 1 0 -2.2939960579201095
x =  7 1 0 4.148214119937312
y =  7 1 0 0.0
reward =  7 1 0 0.1482141199373118
x =  7 1 0 10.823823843219609
y =  7 1 0 0.0
reward =  7 1 0 6.823823843219608
x =  7 1 0 10.823823843219605
y =  7 1 0 0.0
reward =  7 1 0 6.823823843219609
x =  7 1 0 4.148214119937312
y =  7 1 0 0.0
reward =  7 1 0 0.14821411993731154
x =  7 1 0 3.326188739245282
y =  7 1 0 0.0
reward =  7 1 0 1.3261887392452796
x =  7 1 0 6.229929754097247
y =  7 1 0 0.0
reward =  7 1 0 4.229929754097251
x =  7 1 0 3.326188739245282
y =  7 1 0 0.0
reward =  7 1 0 1.3261887392452798
x =  7 1 0 1.758363486590568
y =  7 1 0 0.0
reward =  7 1 0 1.758363486590568
x

reward =  7 2 1 17.070083333962156
x =  7 2 1 28.957124321834456
y =  7 2 1 0.0
reward =  7 2 1 22.95712432183445
x =  7 2 1 23.070083333962163
y =  7 2 1 0.0
reward =  7 2 1 17.07008333396216
x =  7 2 1 14.875396949559134
y =  7 2 1 0.0
reward =  7 2 1 10.875396949559132
x =  7 2 1 25.938938936086675
y =  7 2 1 0.0
reward =  7 2 1 21.938938936086682
x =  7 2 1 25.938938936086686
y =  7 2 1 0.0
reward =  7 2 1 21.93893893608668
x =  7 2 1 14.875396949559136
y =  7 2 1 0.0
reward =  7 2 1 10.875396949559127
x =  7 2 1 4.643906041778605
y =  7 2 1 0.0
reward =  7 2 1 2.643906041778601
x =  7 2 1 15.117112727363121
y =  7 2 1 0.0
reward =  7 2 1 13.117112727363114
x =  7 2 1 21.01210718576151
y =  7 2 1 0.0
reward =  7 2 1 19.0121071857615
x =  7 2 1 15.117112727363123
y =  7 2 1 0.0
reward =  7 2 1 13.117112727363116
x =  7 2 1 4.643906041778605
y =  7 2 1 0.0
reward =  7 2 1 2.6439060417786013
x =  7 2 1 4.359671062810508
y =  7 2 1 0.0
reward =  7 2 1 4.359671062810508
x =  7 2 1 11.32

reward =  7 3 1 8.357103201082044
x =  7 3 1 10.357103201082046
y =  7 3 1 0.0
reward =  7 3 1 8.35710320108204
x =  7 3 1 3.997131347805948
y =  7 3 1 0.0
reward =  7 3 1 1.9971313478059491
x =  7 3 1 2.5841381241176147
y =  7 3 1 0.0
reward =  7 3 1 -1.415861875882385
x =  7 3 1 4.818441040331286
y =  7 3 1 0.0
reward =  7 3 1 0.8184410403312853
x =  7 3 1 2.5841381241176147
y =  7 3 1 0.0
reward =  7 3 1 -1.4158618758823858
maxReward =  7 3 1 29.673768239825282
x =  7 3 2 30.762252844230325
y =  7 3 2 0.0
reward =  7 3 2 24.762252844230336
x =  7 3 2 34.66183976632887
y =  7 3 2 0.0
reward =  7 3 2 30.66183976632888
x =  7 3 2 34.66183976632886
y =  7 3 2 0.0
reward =  7 3 2 30.66183976632889
x =  7 3 2 28.213905179622213
y =  7 3 2 0.0
reward =  7 3 2 26.213905179622206
x =  7 3 2 35.39792018213991
y =  7 3 2 0.0
reward =  7 3 2 33.39792018213993
x =  7 3 2 28.213905179622216
y =  7 3 2 0.0
reward =  7 3 2 26.213905179622202
x =  7 3 2 15.336706616083871
y =  7 3 2 0.0
reward =  7 

x =  7 4 3 20.280703958096897
y =  7 4 3 0.0
reward =  7 4 3 16.280703958096904
x =  7 4 3 20.280703958096904
y =  7 4 3 0.0
reward =  7 4 3 16.280703958096908
x =  7 4 3 11.675971347580198
y =  7 4 3 0.0
reward =  7 4 3 7.675971347580201
maxReward =  7 4 3 35.306965461952196
x =  7 4 4 36.04760166984484
y =  7 4 4 0.0
reward =  7 4 4 36.04760166984484
x =  7 4 4 31.89115585738294
y =  7 4 4 0.0
reward =  7 4 4 29.891155857382948
x =  7 4 4 31.89115585738294
y =  7 4 4 0.0
reward =  7 4 4 29.89115585738294
x =  7 4 4 20.75460179435546
y =  7 4 4 0.0
reward =  7 4 4 16.754601794355462
x =  7 4 4 26.02617103346108
y =  7 4 4 0.0
reward =  7 4 4 22.026171033461072
x =  7 4 4 20.75460179435546
y =  7 4 4 0.0
reward =  7 4 4 16.754601794355462
maxReward =  7 4 4 36.04760166984484
x =  8 0 0 9.326468429496009
y =  8 0 0 0.0
reward =  8 0 0 -0.6735315705039941
x =  8 0 0 16.326296009791566
y =  8 0 0 0.0
reward =  8 0 0 6.3262960097915775
x =  8 0 0 16.326296009791562
y =  8 0 0 0.0
reward = 

y =  8 1 3 0.0
reward =  8 1 3 13.336706616083875
x =  8 1 3 26.69248188660724
y =  8 1 3 0.0
reward =  8 1 3 24.69248188660724
x =  8 1 3 26.692481886607247
y =  8 1 3 0.0
reward =  8 1 3 24.69248188660725
x =  8 1 3 15.33670661608387
y =  8 1 3 0.0
reward =  8 1 3 13.336706616083877
x =  8 1 3 4.16317456885133
y =  8 1 3 0.0
reward =  8 1 3 4.16317456885133
x =  8 1 3 13.496056615265578
y =  8 1 3 0.0
reward =  8 1 3 13.496056615265578
x =  8 1 3 18.728187886559184
y =  8 1 3 0.0
reward =  8 1 3 18.728187886559184
x =  8 1 3 13.49605661526558
y =  8 1 3 0.0
reward =  8 1 3 13.49605661526558
x =  8 1 3 4.16317456885133
y =  8 1 3 0.0
reward =  8 1 3 4.16317456885133
x =  8 1 3 3.3850285425955406
y =  8 1 3 0.0
reward =  8 1 3 1.385028542595539
x =  8 1 3 8.748451593394831
y =  8 1 3 0.0
reward =  8 1 3 6.748451593394835
x =  8 1 3 8.748451593394833
y =  8 1 3 0.0
reward =  8 1 3 6.748451593394836
x =  8 1 3 3.3850285425955415
y =  8 1 3 0.0
reward =  8 1 3 1.3850285425955389
maxReward

y =  8 3 1 0.0
reward =  8 3 1 28.66183976632889
x =  8 3 1 34.66183976632886
y =  8 3 1 0.0
reward =  8 3 1 28.661839766328896
x =  8 3 1 28.213905179622213
y =  8 3 1 0.0
reward =  8 3 1 24.213905179622202
x =  8 3 1 35.39792018213991
y =  8 3 1 0.0
reward =  8 3 1 31.397920182139938
x =  8 3 1 28.213905179622216
y =  8 3 1 0.0
reward =  8 3 1 24.2139051796222
x =  8 3 1 15.336706616083871
y =  8 3 1 0.0
reward =  8 3 1 13.336706616083875
x =  8 3 1 26.69248188660724
y =  8 3 1 0.0
reward =  8 3 1 24.69248188660724
x =  8 3 1 26.692481886607247
y =  8 3 1 0.0
reward =  8 3 1 24.69248188660725
x =  8 3 1 15.33670661608387
y =  8 3 1 0.0
reward =  8 3 1 13.336706616083877
x =  8 3 1 4.16317456885133
y =  8 3 1 0.0
reward =  8 3 1 4.16317456885133
x =  8 3 1 13.496056615265578
y =  8 3 1 0.0
reward =  8 3 1 13.496056615265578
x =  8 3 1 18.728187886559184
y =  8 3 1 0.0
reward =  8 3 1 18.728187886559184
x =  8 3 1 13.49605661526558
y =  8 3 1 0.0
reward =  8 3 1 13.49605661526558
x =  

reward =  9 0 4 11.349434454927469
x =  9 0 4 3.50971390497669
y =  9 0 4 0.0
reward =  9 0 4 3.50971390497669
maxReward =  9 0 4 30.458095000323024
x =  9 1 0 23.070083333962167
y =  9 1 0 0.0
reward =  9 1 0 13.070083333962163
x =  9 1 0 28.957124321834456
y =  9 1 0 0.0
reward =  9 1 0 18.957124321834428
x =  9 1 0 23.070083333962163
y =  9 1 0 0.0
reward =  9 1 0 13.070083333962163
x =  9 1 0 14.875396949559134
y =  9 1 0 0.0
reward =  9 1 0 6.875396949559133
x =  9 1 0 25.938938936086675
y =  9 1 0 0.0
reward =  9 1 0 17.93893893608667
x =  9 1 0 25.938938936086686
y =  9 1 0 0.0
reward =  9 1 0 17.93893893608667
x =  9 1 0 14.875396949559136
y =  9 1 0 0.0
reward =  9 1 0 6.875396949559134
x =  9 1 0 4.643906041778605
y =  9 1 0 0.0
reward =  9 1 0 -1.3560939582214007
x =  9 1 0 15.117112727363121
y =  9 1 0 0.0
reward =  9 1 0 9.117112727363113
x =  9 1 0 21.01210718576151
y =  9 1 0 0.0
reward =  9 1 0 15.012107185761502
x =  9 1 0 15.117112727363123
y =  9 1 0 0.0
reward =  9 

y =  9 4 1 0.0
reward =  9 4 1 31.306965461952174
x =  9 4 1 35.30696546195218
y =  9 4 1 0.0
reward =  9 4 1 31.306965461952174
x =  9 4 1 24.87089319155532
y =  9 4 1 0.0
reward =  9 4 1 22.870893191555325
x =  9 4 1 31.19395112894398
y =  9 4 1 0.0
reward =  9 4 1 29.193951128943972
x =  9 4 1 24.870893191555314
y =  9 4 1 0.0
reward =  9 4 1 22.870893191555318
x =  9 4 1 11.675971347580196
y =  9 4 1 0.0
reward =  9 4 1 11.675971347580196
x =  9 4 1 20.280703958096897
y =  9 4 1 0.0
reward =  9 4 1 20.280703958096897
x =  9 4 1 20.280703958096904
y =  9 4 1 0.0
reward =  9 4 1 20.280703958096904
x =  9 4 1 11.675971347580198
y =  9 4 1 0.0
reward =  9 4 1 11.675971347580198
maxReward =  9 4 1 31.306965461952174
x =  9 4 2 36.04760166984484
y =  9 4 2 0.0
reward =  9 4 2 32.0476016698448
x =  9 4 2 31.89115585738294
y =  9 4 2 0.0
reward =  9 4 2 29.891155857382948
x =  9 4 2 31.89115585738294
y =  9 4 2 0.0
reward =  9 4 2 29.89115585738294
x =  9 4 2 20.75460179435546
y =  9 4 2 0

## Optimized

In [511]:
##########CONSTANTS###########
requestLambda = [3, 2, 2]
returnLambda = [3, 1, 1]

#Number of maximum cars allowed at loc1, loc2, loc3
ml1, ml2, ml3 = 20, 10, 10

gamma = 0.9
numIter = 10
tol = 0.01
Size = (ml1, ml2, ml3)

n1 = np.arange(ml1)
n2 = np.arange(ml2)
n3 = np.arange(ml3)
n4 = np.arange(-5, 6)
#=============================#

#================PreCalculations of Probability and Rewards============#
JointProbLocation1 = np.ones((ml1, ml1))*0
JointProbLocation2 = np.ones((ml2, ml2))*0
JointProbLocation3 = np.ones((ml3, ml3))*0
JointRewardLocation1 = np.ones((ml1, ml1))*0
JointRewardLocation2 = np.ones((ml2, ml2))*0
JointRewardLocation3 = np.ones((ml3, ml3))*0

ProbRequestLocation1 = np.asarray([poisson(requestLambda[0], i) for i in range(ml1)])
ProbRequestLocation2 = np.asarray([poisson(requestLambda[1], i) for i in range(ml2)])
ProbRequestLocation3 = np.asarray([poisson(requestLambda[2], i) for i in range(ml3)])

RewardRequestLocation1 = np.asarray([poisson(requestLambda[0], i)*i*10 for i in range(ml1)])
RewardRequestLocation2 = np.asarray([poisson(requestLambda[1], i)*i*10 for i in range(ml2)])
RewardRequestLocation3 = np.asarray([poisson(requestLambda[2], i)*i*10 for i in range(ml3)])

ProbReturnLocation1 = np.asarray([poisson(returnLambda[0], i) for i in range(ml1)])
ProbReturnLocation2 = np.asarray([poisson(returnLambda[1], i) for i in range(ml2)])
ProbReturnLocation3 = np.asarray([poisson(returnLambda[2], i) for i in range(ml3)])


for (currState, nextState) in itertools.product(n1, n1):  
    for request, ret in itertools.product(n1, n1):
        if(request > currState or currState-request+ret != nextState):
            continue
        JointProbLocation1[currState, nextState] += (ProbRequestLocation1[request]
                            *ProbReturnLocation1[ret])
        JointRewardLocation1[currState, nextState] += (ProbRequestLocation1[request]
                            *ProbReturnLocation1[ret])*request*10
            
for (currState, nextState) in itertools.product(n2, n2):  
    for request, ret in itertools.product(n2, n2):
        if(request > currState or currState-request+ret != nextState):
            continue
        JointProbLocation2[currState, nextState] += (ProbRequestLocation2[request]
                            *ProbReturnLocation2[ret])
        JointRewardLocation2[currState, nextState] += (ProbRequestLocation3[request]
                            *ProbReturnLocation2[ret])*request*10

for (currState, nextState) in itertools.product(n3, n3):  
    for request, ret in itertools.product(n3, n3):
        if(request > currState or currState-request+ret != nextState):
            continue
        JointProbLocation3[currState, nextState] += (ProbRequestLocation3[request]
                            *ProbReturnLocation3[ret])
        JointRewardLocation3[currState, nextState] += (ProbRequestLocation3[request]
                            *ProbReturnLocation3[ret])*request*10
#=========================_________________________========================#
        
def normInfinity(currValue, optimalValue):
    maxDiff = np.max(np.abs(currValue - optimalValue))
    return maxDiff
        

def isValidState(car1, car2, car3):
     return car1 >= 0 and car2 >=0 and car3 >=0 and car1 <ml1 and car2 < ml2 and car3 < ml3

def getCost(mv1, mv2, mv3):
    cost = np.abs(mv1)*2
    return cost

MOVES = []
for mv1, mv2, mv3 in itertools.product(n4, n4, n4):
    if(mv1 + mv2 + mv3 != 0):
        continue
    MOVES.append((mv1, mv2, mv3))


#================================ Value Iteration ===========================#
def ValueIteration(V, Policy):
    for iter1 in (range(numIter)):
        oldV = V.copy()
        print("iter: ", iter1)
        a = time.time()
        #State value
        for i, j, k in itertools.product(n1, n2, n3):
#             print(i, j, k)
            maxReward = -10000.0
            for (mv1, mv2, mv3) in MOVES:
#                 print("move1 = ", mv1, mv2, mv3)
                reward = 0.0
                car1 = i + mv1
                car2 = j + mv2
                car3 = k + mv3
#                 print(i, j, k, car1, car2, car3)
                
                if(not isValidState(car1, car2, car3)):
                    continue
                
                reward-=getCost(mv1, mv2, mv3)
                
                p1_sum = JointProbLocation1[car1, :]   #(10, )
                p2_sum = JointProbLocation2[car2, :]   #(5, )
                p3_sum = JointProbLocation3[car3, :]   #(5, )
                r1_sum = JointRewardLocation1[car1, :].reshape(ml1, 1)  #(10, 1)
                r2_sum = JointRewardLocation2[car2, :].reshape(ml2, 1)   #(5, 1)
                r3_sum = JointRewardLocation3[car3, :].reshape(ml3, 1)    #(5, 1)
                
                p1p2_sum = np.dot(p1_sum.reshape(ml1, 1), p2_sum.reshape(1, ml2))  #(ml1, ml2)
                p1p3_sum = np.dot(p1_sum.reshape(ml1, 1), p3_sum.reshape(1, ml3))  #(ml1, ml3)
                p2p3_sum = np.dot(p2_sum.reshape(ml2, 1), p3_sum.reshape(1, ml3))  #(ml2, ml3)
        
                #========immediate Reward ======================#
                r3p1p2 = np.sum(np.dot(np.tile(r3_sum, (1, ml1)), p1p2_sum))
                r2p1p3 = np.sum(np.dot(np.tile(r2_sum, (1, ml1)), p1p3_sum))
                r1p2p3 = np.sum(np.dot(np.tile(r1_sum, (1, ml2)), p2p3_sum))
                
                immediateReward = r3p1p2 + r2p1p3 + r1p2p3
                #==================================================#
                
                #============Value from next State=================#
                temp = np.tile(p2p3_sum, (ml1, 1, 1))
                futureReward = np.multiply(temp, oldV)
                temp1 = np.tile(p1_sum.reshape(ml1, 1, 1), (1, ml2, ml3))
                futureReward = np.sum(np.multiply(futureReward, temp1))
                #==================================================#
                
                reward += immediateReward+ gamma*futureReward
                if(reward > maxReward):
                    maxReward = reward
                    Policy[i, j, k] = np.array([mv1, mv2, mv3])
            V[i, j, k] = maxReward 

        if(normInfinity(V, oldV) <= 0.1):
            print("Converged in {} iteration".format(iter1 + 2))
            break
        print("Time for 1 iter = ",time.time() - a)
    return V, Policy

In [512]:
import time
Size = (ml1, ml2, ml3)
Value = np.zeros(Size)
Policy = np.zeros(Size + tuple([3]))
a = time.time()
V, P = ValueIteration(Value, Policy)
print("Time Taken = ",time.time() - a)
np.savetxt("Value1.txt", np.ravel(V).reshape(ml1*ml2, ml3), fmt = "%.2f", header = 'values')
np.savetxt("Policy1.txt", np.ravel(P).reshape(ml1*ml2*ml3, 3), fmt = "%i")
print("Value1.txt Saved")
print("Policy1.txt Saved")

iter:  0
Time for 1 iter =  9.400844097137451
iter:  1
Time for 1 iter =  9.797735691070557
iter:  2
Time for 1 iter =  9.346789360046387
iter:  3
Time for 1 iter =  9.314335584640503
iter:  4
Time for 1 iter =  9.297747135162354
iter:  5
Time for 1 iter =  9.40055513381958
iter:  6
Time for 1 iter =  9.28328800201416
iter:  7
Time for 1 iter =  9.395413637161255
iter:  8
Time for 1 iter =  9.387871980667114
iter:  9
Time for 1 iter =  9.374882221221924
Time Taken =  94.003009557724
Value1.txt Saved
Policy1.txt Saved


In [370]:
JointProbLocation1[car1, :].shape

(10,)

In [392]:
r3_sum = JointRewardLocation3[0, :].reshape(5, 1)
tmp = np.tile(r3_sum, (1, 10))
print(tmp.shape)

(5, 10)


---