In [1]:
import numpy as np
import itertools
import math
import heapq
import time
import functools
import gurobi

np.random.seed(1)
np.set_printoptions(precision=4)

In [2]:
ifPrintPlan = False
runGAMS = False
runGetLowerBound = False
runOur_oneStage = False
runOur_twoStage = False
runOur_oneStage_random = False
runOur_twoStage_random = False
runRoyset2 = True
runRoyset3 = False
runDp = False
runMyopic = False
runBrute = False

## Generate Model

In [3]:
def generateModel(locN, stageN, isPrintModel=True):
    # Generate the prior distribution vector
    priorVec = np.random.random(locN)
    priorVec /= priorVec.sum()
    if isPrintModel:
        print('The prior probability vector is:\n' + str(priorVec) + '\n')

    # Generate the Markov transition probability matrix (left stochastic matrix; each column sums up to one)
    tranProbMat = np.random.rand(locN, locN)
    tranProbMat /= tranProbMat.sum(axis=0)[None,:]
    if isPrintModel:
        print('The Markov transition probability matrix (x=Px) is:\n' + str(tranProbMat) + '\n')

    # Generate the probability vector of successful detections (between 0.5 to 1.0)
    detectVec = 1 - 0.5 * np.random.random(locN)
    if isPrintModel:
        print('The probability vector of successful detections is:\n' + str(detectVec) + '\n')

    # Compute the probability vector of missed detections
    missVec = 1 - detectVec
    if isPrintModel:
        print('The probability vector of failed detections is:\n' + str(missVec) + '\n')
    
    # Return model
    return priorVec, tranProbMat, detectVec, missVec

In [4]:
# agentN - number of agents
agentN = 10
# locN - number of locations
locN = 100
# stageN - number of stages
stageN = 15

[priorVec, tranProbMat, detectVec, missVec] = generateModel(locN, stageN, isPrintModel=False)

## Utility Functions

In [5]:
def computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec):
    vec = np.copy(priorVec)
    for s in range(stageN):
        vec = tranProbMat.dot(np.multiply(np.power(missVec, actionVec[s]), vec))
    return sum(vec)

In [6]:
def getPossibleActions(locN, agentN):
    if locN == 1:
        yield [agentN]
    else:
        for i in range(agentN + 1):
            for j in getPossibleActions(locN - 1, agentN - i):
                yield [i] + j

## Write GAMS Model

In [7]:
def writeGamsModel(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec):
    # Open file
    f = open('GAMS_file', 'w')
    
    # Write sets
    f.write('Set k /1*' + str(locN) + '/;\n')
    f.write('Alias (k,i,j);\n\n')
    f.write('Set t /1*' + str(stageN) + '/;\n')
    f.write('Alias (t,s);\n')
    f.write('Sets\n')
    f.write('\ttb(t) base period\n')
    f.write('\ttn(t) non-base period\n')
    f.write('\ttt(t) terminal period;\n')
    f.write('tb(t) = (ord(t) = 1);\n')
    f.write('tn(t) = (ord(t) > 1);\n')
    f.write('tt(t) = (ord(t) = card(t));\n\n')
    
    # Write parameters
    f.write('Parameters\n')
    f.write('\tM /' + str(agentN) + '/\n')
    f.write('\tprior(k)\n')
    for k in range(locN):
        if k == 0:
            f.write('\t/ ' + str(k+1) + '=' + str(priorVec[k]) + ',\n')
        elif k == locN - 1:
            f.write('\t  ' + str(k+1) + '=' + str(priorVec[k]) + ' /\n')
        else:
            f.write('\t  ' + str(k+1) + '=' + str(priorVec[k]) + ',\n')
    f.write('\tmiss(k)\n')
    for k in range(locN):
        if k == 0:
            f.write('\t/ ' + str(k+1) + '=' + str(missVec[k]) + ',\n')
        elif k == locN - 1:
            f.write('\t  ' + str(k+1) + '=' + str(missVec[k]) + ' /\n')
        else:
            f.write('\t  ' + str(k+1) + '=' + str(missVec[k]) + ',\n')
    f.write('\tP(i,j)\n')
    for i in range(locN):
        for j in range(locN):
            if i == 0 and j == 0:
                f.write('\t/ ' + str(i+1) + '.' + str(j+1) + '=' + str(tranProbMat[i][j]) + ',\n')
            elif i == locN-1 and j == locN-1:
                f.write('\t  ' + str(i+1) + '.' + str(j+1) + '=' + str(tranProbMat[i][j]) + ' /;\n')
            else:
                f.write('\t  ' + str(i+1) + '.' + str(j+1) + '=' + str(tranProbMat[i][j]) + ',\n')
    f.write('\n')
    
    # Write variables
    f.write('Variables\n')
    f.write('\tf\n')
    f.write('\ta(t,k)\n')
    f.write('\tinTerm(t,k);\n\n')
    f.write('Integer variables a;\n\n')
    
    # Write equations
    f.write('Equations obj, eq1(t,i), eq1_base(t,i), eq2(t) ;\n')
    f.write('obj(tt).. f =e= Sum(i, exp(inTerm(tt,i)));\n')
    f.write('eq1(tn(t),i).. exp(inTerm(t,i)) =e= Sum(j, P(i,j) * (miss(j) ** a(t,j)) * exp(inTerm(t-1,j)));\n')
    f.write('eq1_base(tb,i).. exp(inTerm(tb,i)) =e= Sum(j, P(i,j) * (miss(j) ** a(tb,j)) * prior(j));\n')
    #f.write('obj(tt).. f =e= Sum(i, inTerm(tt,i));\n')
    #f.write('eq1(tn(t),i).. inTerm(t,i) =e= Sum(j, P(i,j) * (miss(j) ** a(t,j)) * inTerm(t-1,j));\n')
    #f.write('eq1_base(tb,i).. inTerm(tb,i) =e= Sum(j, P(i,j) * (miss(j) ** a(tb,j)) * prior(j));\n')
    f.write('eq2(t).. Sum(k, a(t,k)) =e= M ;\n\n')
    
    # Write model
    f.write('Model search_problem /all/ ;\n\n')
    
    # Write time limit
    f.write('search_problem.reslim = 3600;\n\n')
    
    # Write enable option file
    f.write('search_problem.optfile = 1 ;\n\n')
    
    # Write solve
    f.write('Solve search_problem using minlp minimizing f ;\n')
    
    # Close file
    f.close()

In [8]:
if runGAMS:
    writeGamsModel(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec)

## Compute lower bound

In [9]:
def solveRelaxedSubproblem(agentN, locN, stageN, missVec, q):
    action = [ 0.0 ] * locN
    lamb = 0
    for k in range(locN):
        lamb = max(lamb, - q[k] * math.log(missVec[k]))
    left = 0.0
    right = lamb
    itr = 1
    while True:
        # Solve for action with current lamb
        for k in range(locN):
            if (q[k] * math.log(missVec[k]) + lamb >= 0):
                action[k] = 0;
            else:
                tmp = - lamb / (q[k] * math.log(missVec[k]))
                action[k] = math.log(tmp, missVec[k])
        if math.fabs(sum(action) - agentN) <= 1e-13:
            break
        elif sum(action) > agentN: # Increase lamb thus decrease action
            tmp = lamb
            lamb = (lamb + right) / 2.0
            left = tmp
        else: # Decrease lamb thus increase action
            tmp = lamb
            lamb = (left + lamb) / 2.0
            right = tmp
        itr += 1
    return action

In [10]:
def getLowerBound(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec):    
    # Initialize the decision variables (indexes of locations chosen at each stage)
    # Alert: if use [[0]*locN]*stageN, then we have stageN lists referencing to the same list
    actionVec = [ [0] * locN for s in range(stageN) ] 
    
    # Initialize qR
    qR = np.empty([stageN,locN])
    qR[0,:] = priorVec

    # Initialize qL
    qL = np.ones((stageN,locN))
    
    # Solve the relaxed problem
    prevObjVal = 1.0
    cycleN = 0
    while True:
        cycleN += 1
        # Forward recursion
        for s in range(stageN-1): # s=0,...,T-2
            # Solve subproblem
            q = [ a*b for a,b in zip(qL[s,:],qL[s,:]) ]
            actionVec[s] = solveRelaxedSubproblem(agentN, locN, stageN, missVec, q)
            # update qR
            tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, actionVec[s])
            qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
        # Backward recursion
        for s in reversed(range(1,stageN)): # s=T-1,...,1
            # Solve subproblem
            q = [ a*b for a,b in zip(qL[s,:],qL[s,:]) ]
            actionVec[s] = solveRelaxedSubproblem(agentN, locN, stageN, missVec, q)
            # update qL
            tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, actionVec[s])
            qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix

        # Compute new objective value
        curObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
        # Decide if reaches the optimum
        if curObjVal >= prevObjVal:
            break
        prevObjVal = curObjVal
    return prevObjVal

In [11]:
if runGetLowerBound:
    optLowerBound = getLowerBound(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec)

## Forward-Backward Iterative Algorithm

In [13]:
def forBackIter(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec, relaxedRecur=True): 
    # Initialize the decision variables (indexes of locations chosen at each stage)
    # Alert: if use [[0]*locN]*stageN, then we have stageN lists referencing to the same list
    actionVec = [ [0] * locN for s in range(stageN) ] 
    
    # Initialize qR
    qR = np.empty([stageN,locN])
    qR[0,:] = priorVec

    # Initialize qL
    qL = np.ones((stageN,locN))
    
    # Relaxed Forward-Backward Recursions
    if relaxedRecur == True:
        prevObjVal = 1.0
        cycleN = 0
        while True:
            cycleN += 1
            # Forward recursion
            print('\n==== Relaxed forward recursion starts ====\n')
            for s in range(stageN-1): # s=0,...,T-2
                print('\n---Stage ' + str(s) + '---\n')
                # Solve subproblem
                q = [ a*b for a,b in zip(qL[s,:],qL[s,:]) ]
                actionVec[s] = solveRelaxedSubproblem(agentN, locN, stageN, missVec, q)
                # Update qR
                tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
                tmp_Is = np.power(missVec, actionVec[s])
                qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
            # Backward recursion
            print('\n==== Relaxed backward recursion starts ====\n')
            for s in reversed(range(1,stageN)): # s=T-1,...,1
                print('\n---Stage ' + str(s) + '---\n')
                # Solve subproblem
                q = [ a*b for a,b in zip(qL[s,:],qL[s,:]) ]
                actionVec[s] = solveRelaxedSubproblem(agentN, locN, stageN, missVec, q)
                # Update qL
                tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
                tmp_Is = np.power(missVec, actionVec[s])
                qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix

            # Compute new objective value
            curObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
            print('\nThe objective value becomes: %.4f' % (curObjVal) + '\n')
            # Decide if the relaxed recursions terminates
            if curObjVal >= prevObjVal:
                break
            prevObjVal = curObjVal
      
    # Integer Forward-Backward Recursions
    prevObjVal = 1.0
    cycleN = 0
    isPrintAction = False
    while True:
        cycleN += 1
        # Forward recursion
        print('\n==== Integer forward recursion starts ====\n')
        for s in range(stageN-1): # s=0,...,T-2
            print('\n---Stage ' + str(s) + '---\n')
            # Solve subproblem
            action = [0] * locN
            heap = []
            for k in range(locN):
                heapq.heappush(heap, (- qL[s,k] * qR[s,k] * detectVec[k], k))
            for m in range(agentN):
                ele = heapq.heappop(heap)
                action[ele[1]] += 1
                heapq.heappush(heap, (ele[0] * missVec[ele[1]], ele[1]))
            actionVec[s] = action
            if isPrintAction:
                print('\nThe actions become:\n')
                print(actionVec)
            # Update qR
            tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, actionVec[s])
            qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
        # Backward recursion
        print('\n==== Integer backward recursion starts ====\n')
        for s in reversed(range(1,stageN)): # s=T-1,...,1
            print('\n---Stage ' + str(s) + '---\n')
            # Solve subproblem
            action = [0] * locN
            heap = []
            for k in range(locN):
                heapq.heappush(heap, (- qL[s,k] * qR[s,k] * detectVec[k], k))
            for m in range(agentN):
                ele = heapq.heappop(heap)
                action[ele[1]] += 1
                heapq.heappush(heap, (ele[0] * missVec[ele[1]], ele[1]))
            actionVec[s] = action
            if isPrintAction:
                print('\nThe actions become:\n')
                print(actionVec)
            # Update qL
            tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, actionVec[s])
            qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix
        
        # Compute new objective value
        curObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
        print('\nThe objective value becomes: %.4f' % (curObjVal) + '\n')
        # Decide if the algorithm terminates (must run more than one cycle)
        if curObjVal >= prevObjVal and cycleN > 1:
            return [curObjVal, actionVec, cycleN]
        prevObjVal = curObjVal
    return [curObjVal, actionVec, cycleN]

In [14]:
if runOur_oneStage:
    start_time_our_oneStage = time.time()
    [optObjVal_our_oneStage, optActionVec_our_oneStage, cycleN_our_oneStage] = \
        forBackIter(agentN, locN, stageN, priorVec, tranProbMat, \
                    detectVec, missVec, relaxedRecur=False)
    time_elapsed_our_oneStage = time.time() - start_time_our_oneStage

In [15]:
if runOur_twoStage:
    start_time_our_twoStage = time.time()
    [optObjVal_our_twoStage, optActionVec_our_twoStage, cycleN_our_twoStage] = \
        forBackIter(agentN, locN, stageN, priorVec, tranProbMat, \
                    detectVec, missVec, relaxedRecur=True)
    time_elapsed_our_twoStage = time.time() - start_time_our_twoStage

## Forward-Backward Iterative Algorithm (Random)

In [16]:
def forBackIterRandom(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec, relaxedRecur=True, randomRepetitions=10): 
    # Initialize the decision variables (indexes of locations chosen at each stage)
    # Alert: if use [[0]*locN]*stageN, then we have stageN lists referencing to the same list
    actionVec = [ [0] * locN for s in range(stageN) ] 

    # Initialize qR
    qR = np.empty([stageN,locN])
    qR[0,:] = priorVec

    # Initialize qL
    qL = np.ones((stageN,locN))
    
    # Relaxed Forward-Backward Recursions
    if relaxedRecur == True:
        prevObjVal = 1.0
        cycleN = 0
        isPrintQ = False
        while True:
            cycleN += 1
            # Forward recursion
            print('\n==== Relaxed forward recursion starts ====\n')
            for s in range(stageN-1): # s=0,...,T-2
                #print('\n---Stage ' + str(s) + '---\n')
                # Solve subproblem
                q = [ a*b for a,b in zip(qL[s,:],qL[s,:]) ]
                actionVec[s] = solveRelaxedSubproblem(agentN, locN, stageN, missVec, q)
                # Update qR
                tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
                tmp_Is = np.power(missVec, actionVec[s])
                qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
            # Backward recursion
            print('\n==== Relaxed backward recursion starts ====\n')
            for s in reversed(range(1,stageN)): # s=T-1,...,1
                #print('\n---Stage ' + str(s) + '---\n')
                # Solve subproblem
                q = [ a*b for a,b in zip(qL[s,:],qL[s,:]) ]
                actionVec[s] = solveRelaxedSubproblem(agentN, locN, stageN, missVec, q)
                # Update qL
                tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
                tmp_Is = np.power(missVec, actionVec[s])
                qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix

            # Compute new objective value
            curObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
            print('\nThe objective value becomes: %.4f' % (curObjVal) + '\n')
            # Decide if the relaxed recursions terminates
            if curObjVal >= prevObjVal:
                break
            prevObjVal = curObjVal
        actionDistVec = np.array(actionVec)
        row_sums = actionDistVec.sum(axis=1)
        actionDistVec = actionDistVec / row_sums[:, np.newaxis]
    else:
        actionDistVec = [ [1.0 / locN] * locN ] * stageN
    
    optObjVal = 1.0
    for rep in range(randomRepetitions):
        # Randomly generate intial actionVec
        actionVec = [ [0] * locN ] * stageN
        for s in range(stageN):
            actionVec[s] = np.random.multinomial(agentN, actionDistVec[s])
        
        # Compute qL (no need to compute qR here)
        for s in reversed(range(1, stageN)):
            tmp_q = np.copy(qL[s,:])
            tmp_Is = np.power(missVec, actionVec[s])
            qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q))
        
        # Integer Forward-Backward Recursions
        prevObjVal = 1.0
        cycleN = 0
        isPrintAction = False
        while True:
            cycleN += 1
            # Forward recursion
            print('\n==== Rep ' + str(rep) + ': Integer forward recursion starts ====\n')
            for s in range(stageN-1): # s=0,...,T-2
                #print('\n---Stage ' + str(s) + '---\n')
                # Solve subproblem
                action = [0] * locN
                heap = []
                for k in range(locN):
                    heapq.heappush(heap, (- qL[s,k] * qR[s,k] * detectVec[k], k))
                for m in range(agentN):
                    ele = heapq.heappop(heap)
                    action[ele[1]] += 1
                    heapq.heappush(heap, (ele[0] * missVec[ele[1]], ele[1]))
                actionVec[s] = action
                if isPrintAction:
                    print('\nThe actions become:\n')
                    print(actionVec)
                # Update qR
                tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
                tmp_Is = np.power(missVec, actionVec[s])
                qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
            # Backward recursion
            print('\n==== Rep ' + str(rep) + ': Integer backward recursion starts ====\n')
            for s in reversed(range(1,stageN)): # s=T-1,...,1
                #print('\n---Stage ' + str(s) + '---\n')
                # Solve subproblem
                action = [0] * locN
                heap = []
                for k in range(locN):
                    heapq.heappush(heap, (- qL[s,k] * qR[s,k] * detectVec[k], k))
                for m in range(agentN):
                    ele = heapq.heappop(heap)
                    action[ele[1]] += 1
                    heapq.heappush(heap, (ele[0] * missVec[ele[1]], ele[1]))
                actionVec[s] = action
                if isPrintAction:
                    print('\nThe actions become:\n')
                    print(actionVec)
                # Update qL
                tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
                tmp_Is = np.power(missVec, actionVec[s])
                qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix

            # Compute new objective value
            curObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
            print('\nThe objective value becomes: %.4f' % (curObjVal) + '\n')
            # Decide if the algorithm terminates (must run more than one cycle)
            if curObjVal >= prevObjVal and cycleN > 1:
                break
            prevObjVal = curObjVal
        if curObjVal < optObjVal:
            optObjVal = curObjVal
            optActionVec = actionVec
    return [optObjVal, optActionVec, randomRepetitions]

In [17]:
if runOur_oneStage_random:
    start_time_our_oneStage_random = time.time()
    [optObjVal_our_oneStage_random, optActionVec_our_oneStage_random, randomRepetitionN_our_oneStage_random] = \
        forBackIterRandom(agentN, locN, stageN, priorVec, tranProbMat, \
                          detectVec, missVec, relaxedRecur=False, randomRepetitions=20)
    time_elapsed_our_oneStage_random = time.time() - start_time_our_oneStage_random

In [18]:
if runOur_twoStage_random:
    start_time_our_twoStage_random = time.time()
    [optObjVal_our_twoStage_random, optActionVec_our_twoStage_random, randomRepetitionN_our_twoStage_random] = \
        forBackIterRandom(agentN, locN, stageN, priorVec, tranProbMat, \
                          detectVec, missVec, relaxedRecur=True, randomRepetitions=20)
    time_elapsed_our_twoStage_random = time.time() - start_time_our_twoStage_random

## Cutting-Plane Algorithm 2

### In paper "Route Optimization for Multiple Searchers" by Royset and Sato

In [19]:
def roysetSatoAlg2(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec, delta=1e-10):
    # Build Gurobi model
    model = gurobi.Model("Royset-Sato Alg 2")
    # Add variables
    a = {}
    for s in range(stageN):
        for k in range(locN):
            a[s,k] = model.addVar(obj=0, vtype='I', lb=0, ub=agentN, name='a'+str(s)+','+str(k))
    xi = model.addVar(obj=1.0, name='xi')
    # Add constraints
    for s in range(stageN):
        coef = [ 1 for k in range(locN) ]
        var = [ a[s,k] for k in range(locN) ]
        model.addConstr(gurobi.LinExpr(coef,var), '=', agentN)
    # Do not print Gurobi messages
    model.setParam('OutputFlag', False)
    # Update model
    model.update()

    # Cutting-plane
    total_time = 0.0  # total time used
    start_time = time.time()  # start time
    hi = 1.0  # upper bound of optimal value
    lo = 0.0  # lower bound of optimal value
    prev_a = [ [0] * locN for s in range(stageN) ]  # initialize T-period actions
    delta_i = 0.0  # expected MIP optimality gap for problem P_i
    delta_i_half_flag = False  # if lower bound has not changed after one iteration, halve delta_i
    itr = 1  # iteration counter
    while True:
        # Compute current value and update upper bound of optimal value
        f_a = computeObjVal(stageN, priorVec, tranProbMat, missVec, prev_a)
        if f_a < hi:
            hi = f_a
        
        # Decide if terminates
        if hi - lo <= delta * lo:
            optActionVec = prev_a
            break
        
        # Compute qR for prev_a
        qR = np.empty([stageN,locN])
        qR[0,:] = priorVec
        for s in range(stageN-1): # s=0,...,T-2
            tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, prev_a[s])
            qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
        
        # Compute qL for prev_a
        qL = np.ones((stageN,locN))
        for s in reversed(range(1,stageN)): # s=T-1,...,1
            tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, prev_a[s])
            qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix
        
        # Add one more constraint
        constr_lhs = f_a
        for s in range(stageN):
            for k in range(locN):
                constr_lhs += qL[s][k] * qR[s][k] * (missVec[k] ** prev_a[s][k]) \
                              * (-detectVec[k]) * (a[s,k] - prev_a[s][k])
        model.addConstr(constr_lhs, '<=', xi)
        
        '''
        # Set MIP optimality gap = (Upper - Lower) / Upper
        if itr == 2:
            delta_i = min(0.03, (hi - lo) / lo)
        elif itr > 2:
            if delta_i < 1e-10:
                delta_i = 0.0
            else:
                if delta_i_half_flag:
                    delta_i /= 2.0
                delta_i = min(delta_i, 0.03)
                delta_i = min(delta_i, (hi - lo) / lo)
        model.setParam("MIPGap", delta_i / (1 + delta_i));
        '''
        
        # Update model and solve
        model.update()
        model.optimize()
        
        # Retrieve Gurobi outputs
        prev_a = [ [0] * locN for s in range(stageN) ]
        for s in range(stageN):
            for k in range(locN):
                prev_a[s][k] = model.getVarByName('a'+str(s)+','+str(k)).X
        MIP_solution = model.getVarByName('xi').X
        if MIP_solution > lo:
            lo = MIP_solution
            delta_i_half_flag = False
        else:
            delta_i_half_flag = True  # if lower bound does not change, halve delta_i in next iteration
        
        # Update total time
        total_time = time.time() - start_time
        
        # Compute relative optimality gap:
        if lo != 0.0:
            g_i = (hi - lo) / lo
        else:
            g_i = math.inf
        
        # Print debugging info
        print("iter=%s, rela_gap=%.5f, hi=%.10f, lo=%.10f, fun_value=%.10f, MIP_solution=%.10f, \
               delta_i=%.3e, time_taken=%.1f" \
               % (itr, g_i, hi, lo, f_a, MIP_solution, \
                  delta_i, total_time))
        
        # Decide if terminates
        if hi - lo <= delta * lo:
            optActionVec = prev_a
            break
        
        itr += 1
    optObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, optActionVec)
    return [optObjVal, optActionVec]

In [20]:
if runRoyset2:
    start_time_royset2 = time.time()
    [optObjVal_royset2, optActionVec_royset2] = \
        roysetSatoAlg2(agentN, locN, stageN, priorVec, tranProbMat, \
                       detectVec, missVec, delta=1e-10)
    time_elapsed_royset2 = time.time() - start_time_royset2

iter=1, rela_gap=inf, hi=1.0000000000, lo=0.0000000000, fun_value=1.0000000000, MIP_solution=0.0000000000,                delta_i=0.000e+00, time_taken=0.1
iter=2, rela_gap=inf, hi=0.8354690882, lo=0.0000000000, fun_value=0.8354690882, MIP_solution=0.0000000000,                delta_i=0.000e+00, time_taken=0.2
iter=3, rela_gap=inf, hi=0.8332939528, lo=0.0000000000, fun_value=0.8332939528, MIP_solution=0.0000000000,                delta_i=0.000e+00, time_taken=0.4
iter=4, rela_gap=inf, hi=0.8167298621, lo=0.0000000000, fun_value=0.8167298621, MIP_solution=0.0000000000,                delta_i=0.000e+00, time_taken=0.5
iter=5, rela_gap=inf, hi=0.8167298621, lo=0.0000000000, fun_value=0.8257891151, MIP_solution=0.0000000000,                delta_i=0.000e+00, time_taken=0.7
iter=6, rela_gap=inf, hi=0.8167298621, lo=0.0000000000, fun_value=0.8258268314, MIP_solution=0.0000000000,                delta_i=0.000e+00, time_taken=0.8
iter=7, rela_gap=inf, hi=0.8167298621, lo=0.0000000000, fun_valu

## Cutting-Plane Algorithm 3

### In paper "Route Optimization for Multiple Searchers" by Royset and Sato

In [21]:
def roysetSatoAlg3(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec, delta=1e-10):
    # Build Gurobi model
    model = gurobi.Model("Royset-Sato Alg 3")
    # Add variables
    a = {}
    for s in range(stageN):
        for k in range(locN):
            a[s,k] = model.addVar(obj=0, vtype='C', lb=0, ub=agentN, name='a'+str(s)+','+str(k))  # set a as continuous and 0<=a(s,k)<=M
    xi = model.addVar(obj=1.0, name='xi')
    # Add constraints
    for s in range(stageN):
        coef = [ 1 for k in range(locN) ]
        var = [ a[s,k] for k in range(locN) ]
        model.addConstr(gurobi.LinExpr(coef,var), '=', agentN)
    # Do not print Gurobi messages
    model.setParam('OutputFlag', False)
    # Update model
    model.update()

    # Cutting-plane
    total_time = 0.0  # total time used
    start_time = time.time()  # start time
    hi = 1.0  # upper bound of optimal value
    lo = 0.0  # lower bound of optimal value
    prev_a = [ [0] * locN for s in range(stageN) ]  # initialize T-period actions
    delta_i = 0.0  # expected MIP optimality gap for problem P_i
    delta_i_half_flag = False  # if lower bound has not changed after one iteration, halve delta_i
    hasSwap_relax_to_int_flag = False  # if has just swapped from continuous relaxtion to integer problem
                                       # if not done yet, need to do a.setAttr('vtype', 'I')
    itr = 1  # iteration counter
    while True:
        # Compute current value and update upper bound of optimal value
        f_a = computeObjVal(stageN, priorVec, tranProbMat, missVec, prev_a)
        if f_a < hi:
            hi = f_a
            optActionVec = prev_a
        
        # Decide if terminates
        if hi - lo <= delta * lo:
            break
        
        # Compute relative optimality gap:
        if lo != 0.0:
            g_i = (hi - lo) / lo
        else:
            g_i = math.inf
        
        # Compute qR for prev_a
        qR = np.empty([stageN,locN])
        qR[0,:] = priorVec
        for s in range(stageN-1): # s=0,...,T-2
            tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, prev_a[s])
            qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))

        # Compute qL for prev_a
        qL = np.ones((stageN,locN))
        for s in reversed(range(1,stageN)): # s=T-1,...,1
            tmp_q = np.copy(qL[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, prev_a[s])
            qL[s-1,:] = tranProbMat.T.dot(np.multiply(tmp_Is, tmp_q)) # remember to transpose the transition matrix

        # Add one more constraint
        constr_lhs = f_a
        if g_i > 1e-3 and total_time < 600:  # solve continuous relaxation first
            for s in range(stageN):
                for k in range(locN):
                    constr_lhs += qL[s][k] * qR[s][k] * (missVec[k] ** prev_a[s][k]) \
                                  * math.log(missVec[k]) * (a[s,k] - prev_a[s][k])
        else:
            # If this is the first time swapping from relaxed to integer, reset variable attribute
            if not hasSwap_relax_to_int_flag:
                for s in range(stageN):
                    for k in range(locN):
                        a[s,k].setAttr('vtype', 'I')
                print("iter=%s, rela_gap=%.6f, hi=%s, lo=%s, delta_i=%s, time_taken=%.2f" \
                      % (itr, g_i, hi, lo, delta_i, total_time))
                print("========== Swapping from relaxed to integer ==========")
                hasSwap_relax_to_int_flag = True
            for s in range(stageN):
                for k in range(locN):
                    constr_lhs += qL[s][k] * qR[s][k] * (missVec[k] ** prev_a[s][k]) \
                                  * (-detectVec[k]) * (a[s,k] - prev_a[s][k])
        model.addConstr(constr_lhs, '<=', xi)
        
        # Set Gurobi MIP optimality gap
        if itr == 2:
            delta_i = min(0.03, g_i / 3)
        elif itr > 2:
            if delta_i_half_flag:
                delta_i /= 2.0
            delta_i = min(delta_i, 0.03)
            delta_i = min(delta_i, g_i / 3)
        gurobiMipGap = delta_i / (1 + delta_i)  # Gurobi_optimality_gap = (hi-lo)/hi, thus need conversion
        model.setParam("MIPGap", gurobiMipGap);
        
        # Update and solve model
        model.update()
        model.optimize()
        
        # Retrieve Gurobi outputs
        prev_a = [ [0] * locN for s in range(stageN) ]
        for s in range(stageN):
            for k in range(locN):
                prev_a[s][k] = model.getVarByName('a'+str(s)+','+str(k)).X
        MIP_solution = model.getVarByName('xi').X
        if MIP_solution > lo:
            lo = MIP_solution
            delta_i_half_flag = False
        else:
            delta_i_half_flag = True  # if lower bound does not change, halve delta_i in next iteration
        
        # Update total time
        total_time = time.time() - start_time
        
        # Compute relative optimality gap:
        if lo != 0.0:
            g_i = (hi - lo) / lo
        else:
            g_i = math.inf
        
        # Print debugging info
        if not hasSwap_relax_to_int_flag:
            if itr % 50 == 1:
                print("iter=%s, rela_gap=%.6f, hi=%.10f, lo=%.10f, delta_i=%.4e, time_taken=%.1f" \
                      % (itr, g_i, hi, lo, delta_i, total_time))
        else:
            print("iter=%s, rela_gap=%.6f, hi=%.10f, lo=%.10f, delta_i=%.4e, time_taken=%.1f" \
                  % (itr, g_i, hi, lo, delta_i, total_time))
        
        # Decide if terminates
        if hi - lo <= delta * lo:
            break
        
        itr += 1
        if total_time > 900:
            break
    optObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, optActionVec)
    return [optObjVal, optActionVec]

In [22]:
if runRoyset3:
    start_time_royset3 = time.time()
    [optObjVal_royset3, optActionVec_royset3] = \
        roysetSatoAlg3(agentN, locN, stageN, priorVec, tranProbMat, \
                       detectVec, missVec, delta=1e-10)
    time_elapsed_royset3 = time.time() - start_time_royset3

## POMDP (Dynamic Programming)

### In paper "The Optimal Search for a Moving Target When the Search Path Is Constrained" by James N. Eagle

In [23]:
def eaglePomdp(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec, isPrune=True): 
    actionVecSet= [[]]
    alphaVecSet = []
    alphaVecSet.append(np.zeros(locN))
    isPrintMiddleSteps = False
    for s in reversed(range(stageN)):
        print('\n---Stage ' + str(s) + '---\n')
        newActionVecSet = []
        newAlphaVecSet = []
        for i, alphaVec in enumerate(alphaVecSet):
            for action in getPossibleActions(locN, agentN):
                # update alpha vectors
                tmpAlphaVec = np.copy(alphaVec)
                identityMat = np.identity(locN)
                for k in range(locN):
                    identityMat[k, k] = missVec[k] ** action[k]
                newAlphaVec = identityMat.dot(tranProbMat.T).dot(tmpAlphaVec)
                extraVec = np.zeros(locN)
                for k in range(locN):
                    if action[k] > 0:
                        extraVec[k] = detectVec[k] ** action[k]
                newAlphaVec += extraVec
                newAlphaVecSet.append(newAlphaVec)
                # update associated actions
                newActionVec = list(actionVecSet[i])
                newActionVec.insert(0, action)
                newActionVecSet.append(newActionVec)        
        actionVecSet = newActionVecSet
        alphaVecSet = newAlphaVecSet
        # Prune dominated alpha vectors
        if isPrune:
            # Get indices of alpha vectors to be pruned
            pruneIndices = []
            for i in range(len(alphaVecSet)):
                alpha_i = alphaVecSet[i]
                for j in range(len(alphaVecSet)):
                    if j == i:
                        continue
                    alpha_j = alphaVecSet[j]
                    if functools.reduce(lambda x, y: x*y, np.less(alpha_i, alpha_j)):
                        pruneIndices.append(i)
                        break
            # Prune the dominated alpha vectors
            for i in sorted(pruneIndices, reverse=True):
                del alphaVecSet[i]
                del actionVecSet[i]
        if isPrintMiddleSteps:
            print('\nThe alpha vectors are:\n')
            print(alphaVecSet)
            print('\nThe actions are:\n')
            print(actionVecSet)
            print('\n')

    optObjVal = 0.0
    for i, alphaVec in enumerate(alphaVecSet):
        tmpObjVal = sum(np.multiply(alphaVec, priorVec))
        if tmpObjVal > optObjVal:
            optObjVal = tmpObjVal
            optActions = actionVecSet[i]
    optObjVal = 1 - optObjVal
    return [optObjVal, optActions]

In [24]:
if runDp:
    start_time_dp = time.time()
    [optObjVal_dp, optActions_dp] = eaglePomdp(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec, isPrune=True)
    time_elapsed_dp = time.time() - start_time_dp

## Myopic Policy

In [25]:
def myopic(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec):
    # Alert: if use [[0]*locN]*stageN, then we have stageN lists referencing to the same list
    actionVec = [ [0] * locN for s in range(stageN) ] 

    # Initialize qR
    qR = np.empty([stageN,locN])
    qR[0,:] = priorVec
    
    # Same as forward recursion
    for s in range(stageN): 
        # Solve subproblem
        action = [0] * locN
        heap = []
        for k in range(locN):
            heapq.heappush(heap, (- qR[s,k] * detectVec[k], k))
        for m in range(agentN):
            ele = heapq.heappop(heap)
            action[ele[1]] += 1
            heapq.heappush(heap, (ele[0] * missVec[ele[1]], ele[1]))
        actionVec[s] = action
        # update qR
        if s < stageN - 1:
            tmp_q = np.copy(qR[s,:])  # must use np.copy, otherwise they reference the same array
            tmp_Is = np.power(missVec, actionVec[s])
            qR[s+1,:] = tranProbMat.dot(np.multiply(tmp_Is, tmp_q))
    optObjVal = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
    return [optObjVal, actionVec]

In [26]:
if runMyopic:
    start_time_myopic = time.time()
    [optObjVal_myopic, optActions_myopic] = myopic(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec)
    time_elapsed_myopic = time.time() - start_time_myopic

## Brute Force

In [27]:
def bruteForce(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec):
    optObjVal = 1.0
    for actionVec in itertools.product(getPossibleActions(locN, agentN), repeat=stageN):
        tmp = computeObjVal(stageN, priorVec, tranProbMat, missVec, actionVec)
        if tmp < optObjVal:
            optObjVal = tmp
            optActionVec = actionVec
    return [optObjVal, optActionVec]

In [28]:
if runBrute:
    start_time_brute = time.time()
    [optObjVal_brute, optActionVec_brute] = bruteForce(agentN, locN, stageN, priorVec, tranProbMat, detectVec, missVec)
    time_elapsed_brute = time.time() - start_time_brute

## Final Results

In [29]:
if runGetLowerBound:
    print('\n=====Lower Bound of Minimum=====\n')
    print('The lower bound of the minimum is ' + str(optLowerBound) + ' .\n')
    
if runOur_oneStage:
    print('\n=====For-Back Iter (With One Stage) Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_our_oneStage) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_our_oneStage) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_our_oneStage) + ' .\n')
    print('The algorithm converges in ' + str(cycleN_our_oneStage) + ' cycles.\n')
    
if runOur_twoStage:
    print('\n=====For-Back Iter (With Two Stages) Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_our_twoStage) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_our_twoStage) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_our_twoStage) + ' .\n')
    print('The algorithm converges in ' + str(cycleN_our_twoStage) + ' cycles.\n')
    
if runOur_oneStage_random:
    print('\n=====For-Back Iter Random (With One Stage) Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_our_oneStage_random) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_our_oneStage_random) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_our_oneStage_random) + ' .\n')
    print('Random repetitions = ' + str(randomRepetitionN_our_oneStage_random) + ' .\n')
    
if runOur_twoStage_random:
    print('\n=====For-Back Iter Random (With Two Stages) Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_our_twoStage_random) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_our_twoStage_random) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_our_twoStage_random) + ' .\n')
    print('Random repetitions = ' + str(randomRepetitionN_our_twoStage_random) + ' .\n')
    
if runRoyset2:
    print('\n=====Cutting-Plane (Royset-Sato Alg 2) Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_royset2) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_royset2) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_royset2) + ' .\n')
    
if runRoyset3:
    print('\n=====Cutting-Plane (Royset-Sato Alg 3) Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_royset3) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_royset3) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_royset3) + ' .\n')
    
if runDp:
    print('\n=====POMDP Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_dp) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActions_dp) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_dp) + ' .\n')
    
if runMyopic:
    print('\n=====Myopic Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_myopic) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActions_myopic) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_myopic) + ' .\n')
    
if runBrute:
    print('\n=====Brute Force Final Results=====\n')
    print('The optimal objective value (prob of miss detection) is ' + str(optObjVal_brute) + ' .\n')
    if ifPrintPlan:
        print('\nThe optimal search plan is:\n\n' + str(optActionVec_brute) + '\n')
    print('Time taken to run is %.2f' % (time_elapsed_brute) + ' .\n')
    


=====Cutting-Plane (Royset-Sato Alg 2) Final Results=====

The optimal objective value (prob of miss detection) is 0.859619406649 .

Time taken to run is 271.62 .

