# Invader Defender 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import linprog

In [2]:
actions = [[-1, 0], [0, 1], [1, 0], [0, -1]] #up, right, down, left = (clockwise from up) 
action_count = len(actions) 
gridSize = 6 
state_count = gridSize*gridSize

In [3]:
class Invader_Defender():
    def __init__(self, gridSize):
        self.valueMap = np.zeros((gridSize, gridSize))
        self.states = [[i, j] for i in range(gridSize) for j in range(gridSize)]
        self.size = gridSize
        
        # deterministic transition ?
        self.transition_prob = 1 
        
        # initialize defender and invader states
        self.new_state = [0, 0, 0, 0]
        self.new_defender_state = [0, 0]
        self.new_invader_state = [0, 0]
        
        # set territory state
        self.territory_state = [4, 4]

        # create a list of all possible states in the game
        self.game_state_list = []
        for defender_state in self.states:
            for invader_state in self.states:
                combined_states = defender_state + invader_state
                self.game_state_list.append(combined_states)
        
        # create 2 lists of states representing defender and invader victory
        self.defender_won = []
        self.invader_won = []
        
        # create states representing defender victory
        for defender_state in self.states:
            for invader_state in self.states:
                distance = np.linalg.norm(np.array(defender_state) - np.array(invader_state))
                # if the invader is not at territory and within the capture range of defender = defender won
                if invader_state != self.territory_state and distance <= np.sqrt(2):
                    combined_states = defender_state + invader_state
                    self.defender_won.append(combined_states)
           
        # create states representing invader victory
        for defender_state in self.states:
            distance = np.linalg.norm(np.array(defender_state) - np.array(self.territory_state))
            # if the invader is at territory, and outside of the defender's capture range = invader won
            if distance > np.sqrt(2):
                combined_states = defender_state + self.territory_state
                self.invader_won.append(combined_states)
    
    def possible_states(self):
        """
        A function that returns a list of all possible states in the game
        """
        return self.game_state_list
    
    def terminal_check(self, state):
        """
        A function that checks whether the game is at a terminal state.
        Terminal state happens when either the invader or defender has won.
        """
        if state in self.defender_won:
            status = "Defender Won"
            terminal_check = True
        elif state in self.invader_won:
            status = "Invader Won"
            terminal_check = True
        else:
            terminal_check = False
            status = "Game in Progress"

        return terminal_check, status
    
#     def transition_probability(self, transition):
#         """
#         A function that returns the transition probability...?
#         """
#         return self.transition_prob, reward

    def next_state(self, current_state, defender_action, invader_action):
        """
        A function that returns the next state
        Input: current state [0,0] , defender_action [0, 1], invader_action [0,-1]
        Output: next state array([x1,y1,x2,y2]) and reward (int)
            - If the action takes the agent off grid, the agent remains in original state
            - If defender won, reward is calculated based on manhattan distance between invader captured state
            and territory
            - If defender loss, reward is -100
        """
        defender_state = []
        invader_state = []
        
        # deconstruct current state [0,0,1,1] in to defender [0,0] and invader [1,1] state
        for i in range(4):
            if i < 2:
                defender_state.append(current_state[i])
            else:
                invader_state.append(current_state[i])
                
        # get next state: state: [0, 0], action: [0, 1], new_state = [0, 1]
        self.new_defender_state = list(np.array(defender_state) + np.array(defender_action))
        self.new_invader_state = list(np.array(invader_state) + np.array(invader_action))

        # if new defender states results in off the grid, return to original state
        if -1 in self.new_defender_state or self.size in self.new_defender_state:
            self.new_defender_state = defender_state
        
        # if new invader states results in off the grid, return to original state
        if -1 in self.new_invader_state or self.size in self.new_invader_state:
            self.new_invader_state = invader_state
       
        # combine the defender and invader state
        self.new_state = self.new_defender_state
        self.new_state.extend(self.new_invader_state)
        
        # calculate rewards
        terminal, status = self.terminal_check(self.new_state)
        if terminal == True:
            if status == "Defender Won":
                # defender reward if defender won (manhattan distance between invader captured state and territory)
                distance_to_territory = sum(abs(np.array(self.new_invader_state) - np.array(self.territory_state)))
                self.reward = distance_to_territory
            else:
                # defender reward if invader won
                self.reward = -100
        else:
            self.reward = 0
            
        return self.new_state, self.reward

## Testing 

In [4]:
invader_defender = Invader_Defender(6)

In [5]:
next_state, reward = invader_defender.next_state([2,1,0,0], [-1, 0], [-1, 0])

In [6]:
next_state

[1, 1, 0, 0]

In [7]:
reward

8

In [8]:
invader_defender.terminal_check([1, 1, 0, 0])

(True, 'Defender Won')

## Value Iteration 

In [9]:
invader_defender = Invader_Defender(6)

In [10]:
k = 0
U = {}
gamma = 0.9
state_list = []
listofzeros = [0.0] * len(invader_defender.game_state_list)

# convert game_state_list in to a state list of tuples in order to make a dictionary
for state in invader_defender.game_state_list:
    state_list.append(tuple(state))
    
# initiate params
G = dict(zip(state_list, listofzeros))
U[k] = dict(zip(state_list, listofzeros))

In [11]:
def calculate_payoff(state):
    """
    A function calculates the payoff of a specific state by iterating over every defender/invader action
    Input: state (ie. [0,0,1,1])
    Output: payoff = 4x4 matrix where each element represent the defender's payoff 
    when defender take i, and invader take action j
    """
    payoff = np.zeros([4,4])
    for i in range(action_count):
        defender_action = actions[i]
        for j in range(action_count):
            invader_action = actions[j]
            next_state, reward = invader_defender.next_state(state, defender_action, invader_action)
            payoff[i, j] = reward + gamma*invader_defender.transition_prob*U[k][tuple(next_state)]
    return payoff

In [12]:
def calculate_value(G_state):
    
    # defender lin prog
    c = [0, 0, 0, 0, -1]
    defender_q = -1*G_state     
    v_coeff = np.ones((4,1))
    Aub = np.concatenate((defender_q,v_coeff),1)
    b = [0, 0, 0, 0]
    Aeq = [[1, 1, 1, 1, 0]]
    beq = [[1]]
    bounds = ((0,1),(0,1),(0,1),(0,1),(None, None))
    defender_solution = linprog(c, A_ub=Aub, b_ub=b, A_eq=Aeq, b_eq=beq, bounds=bounds)
    
    # invader lin prog
    c = [0, 0, 0, 0, 1]
#     invader_q = np.transpose(G_state)
    invader_q = G_state
    w_coeff = np.ones((4,1))*-1
    Aub = np.concatenate((invader_q,w_coeff),1)
    invader_solution = linprog(c, A_ub=Aub, b_ub=b, A_eq=Aeq, b_eq=beq, bounds=bounds)
    
    defender_value = defender_solution['fun']
    invader_value = invader_solution['fun']
    
    return defender_value, invader_value

In [13]:
# to remove warnings
import warnings
warnings.filterwarnings('ignore')

for state in invader_defender.game_state_list:
    
    # Build G dictionary {state: payoff (4x4)}
    G[tuple(state)] = calculate_payoff(state)
    defender_value, invader_value = calculate_value(G[tuple(state)])
    # calculate value of game
    print("state: ", state)
    print("defender value: ", defender_value)
    print("invader value: ", invader_value)
    print("---------------------")
    
    # calculate delta
    
    # if delta small enough, break
    
    

state:  [0, 0, 0, 0]
defender value:  -7.999999999970426
invader value:  7.000000000000154
---------------------
state:  [0, 0, 0, 1]
defender value:  -7.999999998400379
invader value:  5.999999999916391
---------------------
state:  [0, 0, 0, 2]
defender value:  -6.999999999994232
invader value:  3.7091668625421903e-09
---------------------
state:  [0, 0, 0, 3]
defender value:  6.252411107099929e-12
invader value:  9.257099311565523e-13
---------------------
state:  [0, 0, 0, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 0, 0, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 0, 1, 0]
defender value:  -7.99999999840037
invader value:  5.999999999916385
---------------------
state:  [0, 0, 1, 1]
defender value:  -6.999999998794923
invader value:  2.500000000008948
---------------------
state:  [0, 0, 1, 2]
defender value:  -5.99999998646847
invader val

state:  [0, 2, 3, 3]
defender value:  2.022440236317792e-12
invader value:  3.092350739962055e-13
---------------------
state:  [0, 2, 3, 4]
defender value:  -3.1323388327564317e-11
invader value:  -99.99999877711943
---------------------
state:  [0, 2, 3, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 3]
defender value:  -3.1300295688652113e-11
invader value:  -99.99999877712138
---------------------
state:  [0, 2, 4, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 5]
defender va

state:  [0, 5, 1, 3]
defender value:  -2.999999974860552
invader value:  2.6966151533969196e-10
---------------------
state:  [0, 5, 1, 4]
defender value:  -3.99999999996231
invader value:  1.333333333449634
---------------------
state:  [0, 5, 1, 5]
defender value:  -4.999999999913335
invader value:  2.9999999985913886
---------------------
state:  [0, 5, 2, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 5, 2, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 5, 2, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 5, 2, 3]
defender value:  1.208585433189472e-12
invader value:  1.6104370710939807e-11
---------------------
state:  [0, 5, 2, 4]
defender value:  -2.99999996605355
invader value:  4.2534986643971706e-10
---------------------
state:  [0, 5, 2, 5]
defender value:  -3.99999990070

state:  [1, 1, 4, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 4, 5]
defender value:  -3.129940751023241e-11
invader value:  -99.99999877712237
---------------------
state:  [1, 1, 5, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 5, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 5, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 5, 3]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 5, 4]
defender value:  -3.1301183867071813e-11
invader value:  -99.99999877712057
---------------------
state:  [1, 1, 5, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 2, 0, 0]
defender v

state:  [1, 4, 1, 3]
defender value:  -2.9999999982789527
invader value:  2.9999999971554496
---------------------
state:  [1, 4, 1, 4]
defender value:  -2.6666666666660945
invader value:  2.0000000000052993
---------------------
state:  [1, 4, 1, 5]
defender value:  -2.9999999959098176
invader value:  2.999999993479897
---------------------
state:  [1, 4, 2, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 4, 2, 1]
defender value:  4.564344376189469e-12
invader value:  5.995894264201906e-13
---------------------
state:  [1, 4, 2, 2]
defender value:  1.328924890500574e-11
invader value:  1.4110609384668265e-11
---------------------
state:  [1, 4, 2, 3]
defender value:  -1.3333333272847225
invader value:  1.3333333333301927
---------------------
state:  [1, 4, 2, 4]
defender value:  -2.999999970561889
invader value:  1.0000000000186238
---------------------
state:  [1, 4, 2, 5]
defender value:  -1.3333333283900513
invade

state:  [2, 0, 5, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 1, 0, 0]
defender value:  2.7257290061723117e-09
invader value:  6.999999999182752
---------------------
state:  [2, 1, 0, 1]
defender value:  5.593425969600365e-09
invader value:  5.999999999995282
---------------------
state:  [2, 1, 0, 2]
defender value:  1.1025540772141276e-10
invader value:  3.1926774837023603e-12
---------------------
state:  [2, 1, 0, 3]
defender value:  2.441932135652217e-12
invader value:  7.89314469376478e-12
---------------------
state:  [2, 1, 0, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 1, 0, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 1, 1, 0]
defender value:  -2.9999999999798024
invader value:  5.999999994280822
---------------------
state:  [2, 1, 1, 1]
defender value:  -4.99999

state:  [2, 3, 4, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 3, 4, 1]
defender value:  1.208585433189472e-12
invader value:  1.6104370710939807e-11
---------------------
state:  [2, 3, 4, 2]
defender value:  4.321730288217664e-12
invader value:  3.722592902152718e-10
---------------------
state:  [2, 3, 4, 3]
defender value:  -9.909558507104066e-09
invader value:  -8.549452257256007e-10
---------------------
state:  [2, 3, 4, 4]
defender value:  2.552546044100817e-12
invader value:  6.818847619998288e-12
---------------------
state:  [2, 3, 4, 5]
defender value:  6.955546267333129e-13
invader value:  3.114334151207418e-10
---------------------
state:  [2, 3, 5, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 3, 5, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 3, 5, 2]
defender v

defender value:  6.7261529188653e-12
invader value:  1.020431559632467e-12
---------------------
state:  [3, 0, 0, 1]
defender value:  6.2524111070998845e-12
invader value:  9.257099311586669e-13
---------------------
state:  [3, 0, 0, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 0, 0, 3]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 0, 0, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 0, 0, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 0, 1, 0]
defender value:  1.096634722005634e-10
invader value:  3.733344991168991e-11
---------------------
state:  [3, 0, 1, 1]
defender value:  2.2085231035090612e-11
invader value:  2.5407990135929503e-10
---------------------
state:  [3, 0, 1, 2]
defender value:  2.441932135652

state:  [3, 2, 2, 1]
defender value:  -1.9999999977694194
invader value:  2.999999999998821
---------------------
state:  [3, 2, 2, 2]
defender value:  -2.9999999982789425
invader value:  2.9999999971554683
---------------------
state:  [3, 2, 2, 3]
defender value:  -1.333333327284722
invader value:  1.3333333333301918
---------------------
state:  [3, 2, 2, 4]
defender value:  4.3217302883001334e-12
invader value:  3.7225929022642633e-10
---------------------
state:  [3, 2, 2, 5]
defender value:  5.797592201802911e-13
invader value:  4.760567208600913e-15
---------------------
state:  [3, 2, 3, 0]
defender value:  1.4575596136750845e-09
invader value:  4.000000002689721
---------------------
state:  [3, 2, 3, 1]
defender value:  -2.9999999982789527
invader value:  2.9999999971554496
---------------------
state:  [3, 2, 3, 2]
defender value:  -1.9999999999560085
invader value:  1.9999999985209147
---------------------
state:  [3, 2, 3, 3]
defender value:  -2.9999999767909324
invader va

state:  [3, 4, 5, 4]
defender value:  5.663098489540972e-09
invader value:  9.596658423747418e-10
---------------------
state:  [3, 4, 5, 5]
defender value:  5.3766585300328086e-12
invader value:  1.0000000000072014
---------------------
state:  [3, 5, 0, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 5, 0, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 5, 0, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 5, 0, 3]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [3, 5, 0, 4]
defender value:  2.022440236317791e-12
invader value:  3.092350738062319e-13
---------------------
state:  [3, 5, 0, 5]
defender value:  4.564344376189469e-12
invader value:  5.995894264201903e-13
---------------------
state:  [3, 5, 1, 0]
defender valu

state:  [4, 1, 3, 2]
defender value:  -1.333333327284722
invader value:  1.3333333333301918
---------------------
state:  [4, 1, 3, 3]
defender value:  4.3217302883001334e-12
invader value:  3.7225929022642633e-10
---------------------
state:  [4, 1, 3, 4]
defender value:  -4.909119333262879e-09
invader value:  -99.999999045578
---------------------
state:  [4, 1, 3, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [4, 1, 4, 0]
defender value:  -2.999999999728349
invader value:  2.999999999997634
---------------------
state:  [4, 1, 4, 1]
defender value:  -2.6666666666660657
invader value:  2.000000000005292
---------------------
state:  [4, 1, 4, 2]
defender value:  -2.999999970561919
invader value:  1.0000000000185545
---------------------
state:  [4, 1, 4, 3]
defender value:  -1.7362213000637894e-08
invader value:  -99.99999777884854
---------------------
state:  [4, 1, 4, 4]
defender value:  2.203310897354339e-12
invade

state:  [4, 4, 1, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [4, 4, 1, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [4, 4, 1, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [4, 4, 1, 3]
defender value:  2.022440236317791e-12
invader value:  3.092350738062319e-13
---------------------
state:  [4, 4, 1, 4]
defender value:  5.797592201783247e-13
invader value:  4.760567208521687e-15
---------------------
state:  [4, 4, 1, 5]
defender value:  2.022440236317791e-12
invader value:  3.092350738062319e-13
---------------------
state:  [4, 4, 2, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [4, 4, 2, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [4, 4, 2, 2]
defender 

state:  [5, 0, 4, 5]
defender value:  -3.129940751023241e-11
invader value:  -99.99999877712237
---------------------
state:  [5, 0, 5, 0]
defender value:  -4.999999959998034
invader value:  4.000000000002957
---------------------
state:  [5, 0, 5, 1]
defender value:  -4.999999999913335
invader value:  2.999999998591388
---------------------
state:  [5, 0, 5, 2]
defender value:  -3.9999999007096436
invader value:  4.218847493575595e-15
---------------------
state:  [5, 0, 5, 3]
defender value:  2.022440236317782e-12
invader value:  3.092350738051995e-13
---------------------
state:  [5, 0, 5, 4]
defender value:  -3.1301183867071813e-11
invader value:  -99.99999877712057
---------------------
state:  [5, 0, 5, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 1, 0, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 1, 0, 1]
defender value:  2.243611533763767

state:  [5, 3, 1, 3]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 3, 1, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 3, 1, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 3, 2, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 3, 2, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 3, 2, 2]
defender value:  2.022440236317791e-12
invader value:  3.092350738062319e-13
---------------------
state:  [5, 3, 2, 3]
defender value:  5.797592201783247e-13
invader value:  4.760567208521687e-15
---------------------
state:  [5, 3, 2, 4]
defender value:  2.2033108973543387e-12
invader value:  1.5336939587662256e-12
---------------------
state:  [5, 3, 2, 5]
defende

state:  [5, 5, 5, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [5, 5, 5, 2]
defender value:  5.797592201783246e-13
invader value:  4.760567208537532e-15
---------------------
state:  [5, 5, 5, 3]
defender value:  -0.999999999398767
invader value:  2.092370721129555e-11
---------------------
state:  [5, 5, 5, 4]
defender value:  -1.9999999966650388
invader value:  4.909949113951484e-10
---------------------
state:  [5, 5, 5, 5]
defender value:  -2.0000000000183755
invader value:  1.000000000056356
---------------------


In [15]:
G[tuple(state)]

array([[1., 2., 2., 1.],
       [1., 2., 2., 1.],
       [1., 2., 2., 1.],
       [1., 2., 2., 1.]])