# Invader Defender 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import linprog

In [2]:
actions = [[-1, 0], [0, 1], [1, 0], [0, -1]] #up, right, down, left = (clockwise from up) 
action_count = len(actions) 
gridSize = 6 
state_count = gridSize*gridSize

In [3]:
class Invader_Defender():
    def __init__(self, gridSize):
        self.valueMap = np.zeros((gridSize, gridSize))
        self.states = [[i, j] for i in range(gridSize) for j in range(gridSize)]
        self.size = gridSize
        
        # deterministic transition ?
        self.transition_prob = 1 
        
        # initialize defender and invader states
        self.new_state = [0, 0, 0, 0]
        self.new_defender_state = [0, 0]
        self.new_invader_state = [0, 0]
        
        # set territory state
        self.territory_state = [4, 4]

        # create a list of all possible states in the game
        self.game_state_list = []
        for defender_state in self.states:
            for invader_state in self.states:
                combined_states = defender_state + invader_state
                self.game_state_list.append(combined_states)
        
        # create 2 lists of states representing defender and invader victory
        self.defender_won = []
        self.invader_won = []
        
        # create states representing defender victory
        for defender_state in self.states:
            for invader_state in self.states:
                distance = np.linalg.norm(np.array(defender_state) - np.array(invader_state))
                # if the invader is not at territory and within the capture range of defender = defender won
                if invader_state != self.territory_state and distance <= np.sqrt(2):
                    combined_states = defender_state + invader_state
                    self.defender_won.append(combined_states)
           
        # create states representing invader victory
        for defender_state in self.states:
            distance = np.linalg.norm(np.array(defender_state) - np.array(self.territory_state))
            # if the invader is at territory, and outside of the defender's capture range = invader won
            if distance > np.sqrt(2):
                combined_states = defender_state + self.territory_state
                self.invader_won.append(combined_states)
    
    def possible_states(self):
        """
        A function that returns a list of all possible states in the game
        """
        return self.game_state_list
    
    def terminal_check(self, state):
        """
        A function that checks whether the game is at a terminal state.
        Terminal state happens when either the invader or defender has won.
        """
        if state in self.defender_won:
            status = "Defender Won"
            terminal_check = True
        elif state in self.invader_won:
            status = "Invader Won"
            terminal_check = True
        else:
            terminal_check = False
            status = "Game in Progress"

        return terminal_check, status
    
#     def transition_probability(self, transition):
#         """
#         A function that returns the transition probability...?
#         """
#         return self.transition_prob, reward

    def next_state(self, current_state, defender_action, invader_action):
        """
        A function that returns the next state
        Input: current state [0,0] , defender_action [0, 1], invader_action [0,-1]
        Output: next state array([x1,y1,x2,y2]) and reward (int)
            - If the action takes the agent off grid, the agent remains in original state
            - If defender won, reward is calculated based on manhattan distance between invader captured state
            and territory
            - If defender loss, reward is -100
        """
        defender_state = []
        invader_state = []
        
        # deconstruct current state [0,0,1,1] in to defender [0,0] and invader [1,1] state
        for i in range(4):
            if i < 2:
                defender_state.append(current_state[i])
            else:
                invader_state.append(current_state[i])
                
        # get next state: state: [0, 0], action: [0, 1], new_state = [0, 1]
        self.new_defender_state = list(np.array(defender_state) + np.array(defender_action))
        self.new_invader_state = list(np.array(invader_state) + np.array(invader_action))

        # if new defender states results in off the grid, return to original state
        if -1 in self.new_defender_state or self.size in self.new_defender_state:
            self.new_defender_state = defender_state
        
        # if new invader states results in off the grid, return to original state
        if -1 in self.new_invader_state or self.size in self.new_invader_state:
            self.new_invader_state = invader_state
       
        # combine the defender and invader state
        self.new_state = self.new_defender_state
        self.new_state.extend(self.new_invader_state)
        
        # calculate rewards
        terminal, status = self.terminal_check(self.new_state)
        if terminal == True:
            if status == "Defender Won":
                # defender reward if defender won (manhattan distance between invader captured state and territory)
                distance_to_territory = sum(abs(np.array(self.new_invader_state) - np.array(self.territory_state)))
                self.reward = distance_to_territory
            else:
                # defender reward if invader won
                self.reward = -100
        else:
            self.reward = 0
            
        return self.new_state, self.reward

## Testing 

In [4]:
invader_defender = Invader_Defender(6)

In [5]:
next_state, reward = invader_defender.next_state([2,1,0,0], [-1, 0], [-1, 0])

In [6]:
next_state

[1, 1, 0, 0]

In [7]:
reward

8

In [8]:
invader_defender.terminal_check([1, 1, 0, 0])

(True, 'Defender Won')

## Value Iteration 

In [9]:
invader_defender = Invader_Defender(6)

In [10]:
k = 0
U = {}
gamma = 0.9
state_list = []
listofzeros = [0.0] * len(invader_defender.game_state_list)

# convert game_state_list in to a state list of tuples in order to make a dictionary
for state in invader_defender.game_state_list:
    state_list.append(tuple(state))
    
# initiate params
G = dict(zip(state_list, listofzeros))
U[k] = dict(zip(state_list, listofzeros))

In [11]:
def calculate_payoff(state):
    """
    A function calculates the payoff of a specific state by iterating over every defender/invader action
    Input: state (ie. [0,0,1,1])
    Output: payoff = 4x4 matrix where each element represent the defender's payoff 
    when defender take i, and invader take action j
    """
    payoff = np.zeros([4,4])
    for i in range(action_count):
        defender_action = actions[i]
        for j in range(action_count):
            invader_action = actions[j]
            next_state, reward = invader_defender.next_state(state, defender_action, invader_action)
            payoff[i, j] = reward + gamma*invader_defender.transition_prob*U[k][tuple(next_state)]
    return payoff

In [12]:
def calculate_value(G_state):
    
    c = [0, 0, 0, 0, -1]
    defender_q = -1*G_state     
    v_coeff = np.ones((4,1))
    Aub = np.concatenate((defender_q,v_coeff),1)
    b = [0, 0, 0, 0]
    Aeq = [[1, 1, 1, 1, 0]]
    beq = [[1]]
    bounds = ((0,1),(0,1),(0,1),(0,1),(None, None))
    defender_solution = linprog(c, A_ub=Aub, b_ub=b, A_eq=Aeq, b_eq=beq, bounds=bounds)
    
    invader_q = np.transpose(defender_q)
    Aub = np.concatenate((invader_q,v_coeff),1)
    invader_solution = linprog(c, A_ub=Aub, b_ub=b, A_eq=Aeq, b_eq=beq, bounds=bounds)
    
    defender_value = defender_solution['fun']
    invader_value = invader_solution['fun']
    
    return defender_value, invader_value

In [13]:
# to remove warnings
import warnings
warnings.filterwarnings('ignore')

for state in invader_defender.game_state_list:
    
    # Build G dictionary {state: payoff (4x4)}
    G[tuple(state)] = calculate_payoff(state)
    defender_value, invader_value = calculate_value(G[tuple(state)])
    # calculate value of game
    print("state: ", state)
    print("defender value: ", defender_value)
    print("invader value: ", invader_value)
    print("---------------------")
    
    # calculate delta
    
    # if delta small enough, break
    
    

state:  [0, 0, 0, 0]
defender value:  -7.999999999970426
invader value:  -6.999999999991143
---------------------
state:  [0, 0, 0, 1]
defender value:  -7.999999998400379
invader value:  -5.999999949372199
---------------------
state:  [0, 0, 0, 2]
defender value:  -6.999999999994232
invader value:  3.843403593668384e-09
---------------------
state:  [0, 0, 0, 3]
defender value:  6.252411107099929e-12
invader value:  6.2524111070998805e-12
---------------------
state:  [0, 0, 0, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 0, 0, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 0, 1, 0]
defender value:  -7.99999999840037
invader value:  -5.999999949372178
---------------------
state:  [0, 0, 1, 1]
defender value:  -6.999999998794923
invader value:  -2.4999999248691247
---------------------
state:  [0, 0, 1, 2]
defender value:  -5.99999998646847
invade

state:  [0, 2, 3, 4]
defender value:  -3.1323388327564317e-11
invader value:  99.9999999989466
---------------------
state:  [0, 2, 3, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 3]
defender value:  -3.1300295688652113e-11
invader value:  99.9999999989466
---------------------
state:  [0, 2, 4, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 2, 4, 5]
defender value:  -3.129940751023241e-11
invader value:  99.9999999989466
---------------------
state:  [0, 2, 5, 0]
defender value:  2.

state:  [0, 5, 1, 3]
defender value:  -2.999999974860552
invader value:  7.91982315090971e-11
---------------------
state:  [0, 5, 1, 4]
defender value:  -3.99999999996231
invader value:  -1.3333333333321034
---------------------
state:  [0, 5, 1, 5]
defender value:  -4.999999999913335
invader value:  -2.999999999400952
---------------------
state:  [0, 5, 2, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 5, 2, 1]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 5, 2, 2]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [0, 5, 2, 3]
defender value:  1.208585433189472e-12
invader value:  1.2085854331691824e-12
---------------------
state:  [0, 5, 2, 4]
defender value:  -2.99999996605355
invader value:  2.6739395457638846e-11
---------------------
state:  [0, 5, 2, 5]
defender value:  -3.99999990070

state:  [1, 1, 3, 4]
defender value:  -3.1323388327564317e-11
invader value:  99.9999999989466
---------------------
state:  [1, 1, 3, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 4, 0]
defender value:  5.663097509994836e-12
invader value:  5.663097510011314e-12
---------------------
state:  [1, 1, 4, 1]
defender value:  4.56434437618947e-12
invader value:  4.564344376189469e-12
---------------------
state:  [1, 1, 4, 2]
defender value:  2.022440236317792e-12
invader value:  2.022440236317791e-12
---------------------
state:  [1, 1, 4, 3]
defender value:  -3.1300295688652113e-11
invader value:  99.9999999989466
---------------------
state:  [1, 1, 4, 4]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 1, 4, 5]
defender value:  -3.129940751023241e-11
invader value:  99.9999999989466
---------------------
state:  [1, 1, 5, 0]
defender value:  2.2436115

invader value:  99.9999999989466
---------------------
state:  [1, 3, 5, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 4, 0, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [1, 4, 0, 1]
defender value:  6.2524111070998805e-12
invader value:  6.252411107099929e-12
---------------------
state:  [1, 4, 0, 2]
defender value:  8.993316492862114e-11
invader value:  1.408966376965318e-11
---------------------
state:  [1, 4, 0, 3]
defender value:  -1.9999999904845545
invader value:  -3.99999999997275
---------------------
state:  [1, 4, 0, 4]
defender value:  -2.9999999997283524
invader value:  -2.999999999996605
---------------------
state:  [1, 4, 0, 5]
defender value:  -1.9999999922241547
invader value:  -3.999999999956316
---------------------
state:  [1, 4, 1, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
----------------

state:  [2, 0, 2, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 0, 3, 0]
defender value:  -5.9999999999451195
invader value:  -4.000000000000477
---------------------
state:  [2, 0, 3, 1]
defender value:  -4.999999999990249
invader value:  -1.4999999999839022
---------------------
state:  [2, 0, 3, 2]
defender value:  1.0980331285482973e-11
invader value:  2.3526791921170448e-11
---------------------
state:  [2, 0, 3, 3]
defender value:  2.022440236317782e-12
invader value:  2.022440236317792e-12
---------------------
state:  [2, 0, 3, 4]
defender value:  -3.1323388327564317e-11
invader value:  99.9999999989466
---------------------
state:  [2, 0, 3, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 0, 4, 0]
defender value:  3.567410014591632e-11
invader value:  1.8843984594972515e-11
---------------------
state:  [2, 0, 4, 1]
defender value:  1.098033

state:  [2, 2, 5, 3]
defender value:  2.2033108973543387e-12
invader value:  2.2033108973543387e-12
---------------------
state:  [2, 2, 5, 4]
defender value:  -3.1301183867071813e-11
invader value:  99.9999999989466
---------------------
state:  [2, 2, 5, 5]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 3, 0, 0]
defender value:  2.2436115337637674e-12
invader value:  2.2436115337637674e-12
---------------------
state:  [2, 3, 0, 1]
defender value:  2.9748209937790503e-12
invader value:  2.9748209955545155e-12
---------------------
state:  [2, 3, 0, 2]
defender value:  8.993316492527571e-11
invader value:  1.4089663769543922e-11
---------------------
state:  [2, 3, 0, 3]
defender value:  1.4575596136682927e-09
invader value:  -3.9999999999914473
---------------------
state:  [2, 3, 0, 4]
defender value:  8.129796714349903e-12
invader value:  6.507236392620891e-12
---------------------
state:  [2, 3, 0, 5]
defender value

KeyboardInterrupt: 