In [None]:
import numpy as np
import torch

from cost import Cost

class PlayerCost(object):
    def __init__(self):
        self._costs = []
        self._args = []
        self._weights = []

    def __call__(self, x, u, k):
        """
        Evaluate the game cost function at the current state and controls.
        NOTE: `x`, each `u` are all column vectors.

        :param x: state of the system
        :type x: np.array or torch.Tensor
        :param u: list of control inputs for each player
        :type u: [np.array] or [torch.Tensor]
        :param k: time step, if cost is time-varying
        :type k: uint
        :return: scalar value of cost
        :rtype: float or torch.Tensor
        """
        first_time_through = True
        for cost, arg, weight in zip(self._costs, self._args, self._weights):
            if arg == "x":
                cost_input = x
            else:
                cost_input = u[arg]


            # Print the cost input (DELETE THIS):
            #print("This is for k = ", k)
            #print("This is for the cost: ", cost._name)
            #print("cost input is: ", cost_input)
            #print("weight is: ", weight)
            #print("The cost(cost_input,k) is: ", cost(cost_input,k))
            
            
            if target_margin_function == True:
                if cost._name == goal_cost:
                    continue
                else:
                    weight = 0
                    
            elif target_margin_function == False:
                if cost._name == obstacle:
                    continue
                else:
                    weight = 0
            
            current_term = weight * cost(cost_input, k)
            if current_term > 1e8:
                print("Warning: cost %s is %f" % (cost._name, current_term))
                print("Input is: ", cost_input)

#            if cost._name[:4] == "bike":
#                print(cost._name, ": ", current_term)

            if first_time_through:
                total_cost = current_term
            else:
                total_cost += current_term

            first_time_through = False

        return total_cost

    def add_cost(self, cost, arg, weight=1.0):
        """
        Add a new cost to the game, and specify its argument to be either
        "x" or an integer indicating which player's control it is, e.g. 0
        corresponds to u0. Also assign a weight.

        :param cost: cost function to add
        :type cost: Cost
        :param arg: argument of cost, either "x" or a player index
        :type arg: string or uint
        :param weight: multiplicative weight for this cost
        :type weight: float
        """
        self._costs.append(cost)
        self._args.append(arg)
        self._weights.append(weight)

    def quadraticize(self, x, u, k, k_tracker, ii):
        """
        Compute a quadratic approximation to the overall cost for a
        particular choice of state `x`, and controls `u` for each player.

        Returns the gradient and Hessian of the overall cost such that:
        ```
           cost(x + dx, [ui + dui], k) \approx
                cost(x, u1, u2, k) +
                grad_x^T dx +
                0.5 * (dx^T hess_x dx + sum_i dui^T hess_ui dui)
        ```

        NOTE that in the notation of `solve_lq_game.py`, for player i:
          * `grad_x = li`
          * `hess_x = Qi`
          * `hess_uj = Rij`

        :param x: state
        :type x: np.array
        :param u: list of control inputs for each player
        :type u: np.array
        :param k: time step, if cost is time-varying
        :type k: uint
        :param k_tracker: the iteration for t' for reachability
        :type k_tracker: integer
        :return: cost(x, u), grad_x, hess_x, [hess_ui]
        :rtype: float, np.array, np.array, [np.array]
        """
        num_players = len(u)

        # Convert to torch.Tensor format.
        x_torch = torch.from_numpy(x).requires_grad_(True)
        u_torch = [torch.from_numpy(ui).requires_grad_(True) for ui in u]

        # Evaluate cost here.
        cost_torch = self.__call__(x_torch, u_torch, k)
        cost = cost_torch.item()

        # Compute gradients (and store numpy versions).
        grad_x_torch = torch.autograd.grad(
            cost_torch, x_torch, create_graph=True, allow_unused=True)[0]
        grad_u_torch = [
            torch.autograd.grad(
                cost_torch, ui_torch, create_graph=True, allow_unused=True)[0]
            for ui_torch in u_torch]

        # Compute Hessians (and store numpy versions), and be careful to
        # catch Nones (which indicate cost not depending on a particular
        # variable).
        hess_x = np.zeros((len(x), len(x)))
        grad_x = np.zeros((len(x), 1))
        if grad_x_torch is not None:
            grad_x = grad_x_torch.detach().numpy().copy()
            for ii in range(len(x)):
                hess_row = torch.autograd.grad(
                    grad_x_torch[ii, 0], x_torch, retain_graph=True)[0]
                hess_x[ii, :] = hess_row.detach().numpy().copy().T

        hess_u = []
        for ii in range(num_players):
            hess_ui = np.zeros((len(u[ii]), len(u[ii])))
            grad_ui_torch = grad_u_torch[ii]
            if grad_ui_torch is not None:
                grad_ui = grad_ui_torch.detach().numpy().copy()
                for dim in range(len(u[ii])):
                    hess_row = torch.autograd.grad(
                        grad_ui_torch[dim, 0], u_torch[ii], retain_graph=True)[0]
                    hess_ui[dim, :] = hess_row.detach().numpy().copy().T

            hess_u.append(hess_ui)
            
            
        # Me trying to do the reachability stuff
        #print("Length of Hessian of x is: ", len(hess_x[0]))
        #print("Length of Gradient of x is: ", len(grad_x[0]))
        #print("Cost is: ", hess_u)
        

        
        
        # TRY 2:
        # if k == k_tracker:
        #     num_players = len(u)

        #     # Congert to torch.Tensor format.
        #     x_torch = torch.from_numpy(x).requires_grad_(True)
        #     u_torch = [torch.from_numpy(ui).requires_grad_(True) for ui in u]

        #     # Evaluate cost here.
        #     cost_torch = self.__call__(x_torch, u_torch, k)
        #     cost = cost_torch.item()

        #     # Compute gradients (and store numpy versions).
        #     grad_x_torch = torch.autograd.grad(
        #         cost_torch, x_torch, create_graph=True, allow_unused=True)[0]
        #     grad_u_torch = [
        #         torch.autograd.grad(
        #             cost_torch, ui_torch, create_graph=True, allow_unused=True)[0]
        #         for ui_torch in u_torch]

        #     # Compute Hessians (and store numpy versions), and be careful to
        #     # catch Nones (which indicate cost not depending on a particular
        #     # variable).
        #     hess_x = np.zeros((len(x), len(x)))
        #     grad_x = np.zeros((len(x), 1))
        #     if grad_x_torch is not None:
        #         grad_x = grad_x_torch.detach().numpy().copy()
        #         for ii in range(len(x)):
        #             hess_row = torch.autograd.grad(
        #                 grad_x_torch[ii, 0], x_torch, retain_graph=True)[0]
        #             hess_x[ii, :] = hess_row.detach().numpy().copy().T

        #     hess_u = []
        #     for ii in range(num_players):
        #         hess_ui = np.zeros((len(u[ii]), len(u[ii])))
        #         grad_ui_torch = grad_u_torch[ii]
        #         if grad_ui_torch is not None:
        #             grad_ui = grad_ui_torch.detach().numpy().copy()
        #             for dim in range(len(u[ii])):
        #                 hess_row = torch.autograd.grad(
        #                     grad_ui_torch[dim, 0], u_torch[ii], retain_graph=True)[0]
        #                 hess_ui[dim, :] = hess_row.detach().numpy().copy().T

        #         hess_u.append(hess_ui)
                
        # else:
        #     #hess_x = np.identity(len(x))
        #     #grad_x = np.zeros((len(x), 1))
        #     #hess_u = [np.identity(2), np.identity(2)]
        #     #cost = 0.0
            
        #     hess_x = np.identity(len(x)) * 0.05
        #     grad_x = np.zeros((len(x), 1))
        #     hess_u = [np.identity(2) * 0.05, np.identity(2) * 0.05]
        #     cost = 0.0
            
            
            
            
            
            
            
            
            
        # TRY 3: THIS IS ME TRYING SOMETHING. DELETE IF NOT RIGHT    
        if ii == 0:
            if k == k_tracker:
                num_players = len(u)
    
                # Congert to torch.Tensor format.
                x_torch = torch.from_numpy(x).requires_grad_(True)
                u_torch = [torch.from_numpy(ui).requires_grad_(True) for ui in u]
    
                # Evaluate cost here.
                cost_torch = self.__call__(x_torch, u_torch, k)
                cost = cost_torch.item()
    
                # Compute gradients (and store numpy versions).
                grad_x_torch = torch.autograd.grad(
                    cost_torch, x_torch, create_graph=True, allow_unused=True)[0]
                grad_u_torch = [
                    torch.autograd.grad(
                        cost_torch, ui_torch, create_graph=True, allow_unused=True)[0]
                    for ui_torch in u_torch]
    
                # Compute Hessians (and store numpy versions), and be careful to
                # catch Nones (which indicate cost not depending on a particular
                # variable).
                hess_x = np.zeros((len(x), len(x)))
                grad_x = np.zeros((len(x), 1))
                if grad_x_torch is not None:
                    grad_x = grad_x_torch.detach().numpy().copy()
                    for ii in range(len(x)):
                        hess_row = torch.autograd.grad(
                            grad_x_torch[ii, 0], x_torch, retain_graph=True)[0]
                        hess_x[ii, :] = hess_row.detach().numpy().copy().T
    
                hess_u = []
                for ii in range(num_players):
                    hess_ui = np.zeros((len(u[ii]), len(u[ii])))
                    grad_ui_torch = grad_u_torch[ii]
                    if grad_ui_torch is not None:
                        grad_ui = grad_ui_torch.detach().numpy().copy()
                        for dim in range(len(u[ii])):
                            hess_row = torch.autograd.grad(
                                grad_ui_torch[dim, 0], u_torch[ii], retain_graph=True)[0]
                            hess_ui[dim, :] = hess_row.detach().numpy().copy().T
    
                    hess_u.append(hess_ui)
                    
            else:
                #hess_x = np.identity(len(x))
                #grad_x = np.zeros((len(x), 1))
                #hess_u = [np.identity(2), np.identity(2)]
                #cost = 0.0
                
                hess_x = np.identity(len(x)) * 0.01
                grad_x = np.zeros((len(x), 1))
                hess_u = [np.identity(2) * 0.01, np.identity(2) * 0.01]
                cost = 0.0
                
        else:
            num_players = len(u)

            # Congert to torch.Tensor format.
            x_torch = torch.from_numpy(x).requires_grad_(True)
            u_torch = [torch.from_numpy(ui).requires_grad_(True) for ui in u]

            # Evaluate cost here.
            cost_torch = self.__call__(x_torch, u_torch, k)
            cost = cost_torch.item()

            # Compute gradients (and store numpy versions).
            grad_x_torch = torch.autograd.grad(
                cost_torch, x_torch, create_graph=True, allow_unused=True)[0]
            grad_u_torch = [
                torch.autograd.grad(
                    cost_torch, ui_torch, create_graph=True, allow_unused=True)[0]
                for ui_torch in u_torch]

            # Compute Hessians (and store numpy versions), and be careful to
            # catch Nones (which indicate cost not depending on a particular
            # variable).
            hess_x = np.zeros((len(x), len(x)))
            grad_x = np.zeros((len(x), 1))
            if grad_x_torch is not None:
                grad_x = grad_x_torch.detach().numpy().copy()
                for ii in range(len(x)):
                    hess_row = torch.autograd.grad(
                        grad_x_torch[ii, 0], x_torch, retain_graph=True)[0]
                    hess_x[ii, :] = hess_row.detach().numpy().copy().T

            hess_u = []
            for ii in range(num_players):
                hess_ui = np.zeros((len(u[ii]), len(u[ii])))
                grad_ui_torch = grad_u_torch[ii]
                if grad_ui_torch is not None:
                    grad_ui = grad_ui_torch.detach().numpy().copy()
                    for dim in range(len(u[ii])):
                        hess_row = torch.autograd.grad(
                            grad_ui_torch[dim, 0], u_torch[ii], retain_graph=True)[0]
                        hess_ui[dim, :] = hess_row.detach().numpy().copy().T

                hess_u.append(hess_ui)
            
            
        
        return cost, grad_x, hess_x, hess_u


# This is for reach-avoid for each player (Try 1)

In [None]:
#1a. This is for the target (checking the target distance throughout entire trajectory)
# Once the entire trajectory is checked
hold = 0
hold_new = 0
target_margin_func = np.zeros((self._horizon, 1))
k_tracker = 0
eps = 0.01
for k in range(self._horizon):
    #print("xs is: ", xs)
    #print("xs[k][0:2] is: ", xs[k][0:2])
    #print("xs[k][5:7] is: ", xs[k][5:7])
    hold_new = self._TargetDistance(xs[k], car1_position_indices, target_position, target_radius)
    target_margin_func[k] = hold_new
    
    if k == 0:
        hold = hold_new #This is the first computed distance (at t=0)
    elif hold_new < hold:
        hold = hold_new
        k_tracker = k
        
        
        
#1b. This is for the obstacle (checking the obstacle distance from t* to T) (CHECK THIS AGAIN!!!!)
obs_margin_func = np.zeros((self._horizon, 1))
hold_obs = 0
hold_new_obs = 0
k_track_obs = 0
for j in range(k_tracker):
    hold_new_obs = self._ObstacleDistance(xs[j], car1_position_indices, obstacle_position, obstacle_radius)
    obs_margin_func[j] = hold_new_obs
    
    if j == 0:
        hold_obs = hold_new_obs
    elif hold_new_obs > hold_obs:
        hold_obs = hold_new_obs
        k_track_obs = j
        
#1c. This is me checking the max between target and margin function (Equation 4 in Jaime's Reach-Avoid 2015 paper)
if target_margin_func[k_tracker] > obs_margin_func[k_track_obs]:
    target_margin_function = True
    k_track = k_tracker # This tells me that t* comes from target margin function
else:
    target_margin_function = False
    k_track = k_track_obs # This tells me that t* comes from the obstacle margin function

# This is for reach-avoid (Try 2)

In [None]:
# THIS IS ME TRYING TO FIND t* FOR REACHABILITY:
car1_position_indices = (0,1)
x_index, y_index = car1_position_indices
target_position = (6.5, 35.0)
target_radius = 2

# Defining things for obstacle(s)
obstacle_position = (6.5, 15.0)
obstacle_radius = 4

#1.
# Pre-allocation for target stuff
hold = 0
hold_new = 0
target_margin_func = np.zeros((self._horizon, 1))
k_tracker = 0
eps = 0.01

# Pre-allocation for obstacle stuff
obs_margin_func = np.zeros((self._horizon, 1))
payoff = np.zeros((self._horizon, 1))
t_max_obs = np.zeros((self._horizon, 1))

#1a. This is for the target (checking the target distance throughout entire trajectory)
# Once the entire trajectory is checked
for k in range(self._horizon):
    hold_new = self._TargetDistance(xs[k], car1_position_indices, target_position, target_radius)
    target_margin_func[k] = hold_new



    #1b. This is for the obstacle (checking the obstacle distance from t* to T) (CHECK THIS AGAIN!!!!)
    hold_obs = 0
    hold_new_obs = 0
    k_track_obs = 0
    for j in range(k): # Run look to find closest distance to obstacle from time [0, k]
        hold_new_obs = self._ObstacleDistance(xs[j], car1_position_indices, obstacle_position, obstacle_radius)

        if j == 0:
            hold_obs = hold_new_obs
            k_track_obs = j
        elif hold_new_obs > hold_obs:
            hold_obs = hold_new_obs
            k_track_obs = j
            
    # 1. Store the max of g from [0, k] and 2. Store the time between [0, k] where g is max for each iteration
    obs_margin_func[k] = hold_obs
    t_max_obs[k] = k_track_obs

            
            
    #1c. This is me checking the max between target and obstacle margin function (Equation 4 in Jaime's Reach-Avoid 2015 paper)
    if hold_new > hold_obs:
        payoff[k] = hold_new
        #target_margin_function = True
        #k_track = k_tracker # This tells me that t* comes from target margin function
    else:
        payoff[k] = hold_obs
        #target_margin_function = False
        #k_track = k_track_obs # This tells me that t* comes from the obstacle margin function
        
# Now, we find t when the payoff is min
t_star = arg.min(payoff)

# Now that we have the min payoff, we need to figure out if l or g is the max at that time
if target_margin_func[t_star] > obs_margin_func[t_star]:
    target_margin_function = True
    time_star = t_star
else:
    target_margin_function = False
    time_star = t_max_obs[t_star]