## Continuous action and state models:

### Point mass model
Continuous time:
$$ \begin{aligned}
\dot x = v_x,\\ 
\dot y = v_y,\\ 
\dot v_x = a_x,\\ 
\dot v_y = a_y,\\ 
\end{aligned}$$

Discrete time:
$$ \begin{aligned}
x(t+1) &= x(t) + \gamma v_x(t)\\
y(t+1) &= y(t) + \gamma v_y(t)\\
v_x(t+1) &= v_x(t) + \gamma a_x(t)\\
v_y(t+1) &= v_y(t) + \gamma a_y(t)\\
\end{aligned}$$

### Unicycle model
Continuous time:
$$ \begin{aligned}
\dot x = v \sin(\theta),\\
\dot y = v \cos(\theta),\\ 
\dot v = a,\\ 
\dot \theta = \omega
\end{aligned}$$

Discrete time:
$$ \begin{aligned}
x(t+1) &= x(t) + \gamma v(t) \sin(\theta(t)),\\
y(t+1) &= y(t) + \gamma v(t) \cos(\theta(t)),\\
v(t+1) &= v(t) + \gamma a(t),\\
\theta(t+1) &= \theta(t) + \gamma\omega(t),\\
\end{aligned}$$
for stepsize $\gamma > 0$


In [1]:
import numpy as np
import matplotlib.pyplot as plt

def step_pointmass(state, action, step_size=0.1):
    x,y,vx,vy = state
    ax,ay = action
    
    x_next = x + step_size * vx
    y_next = y + step_size * vy
    
    vx_next = vx + step_size * ax
    vy_next = vy + step_size * ay
    
    return (x_next, y_next, vx_next, vy_next)

def reset_pointmass(x,y,vx,vy):
    return (x,y,vx,vy)

def saturate_vel_pointmass(state, sat):
    x,y,vx,vy = state
    v = np.sqrt(vx**2 + vy**2)
    b = v>sat
    s = np.nansum([(1-b), b*sat/v], axis=0)
    return x,y,vx*s,vy*s
    
def step_unicycle(state, action, step_size=0.1):
    x,y,v,t = state
    w,a = action
    x_next = x + step_size * v * np.sin(t)
    y_next = y + step_size * v * np.cos(t)
    v_next = v + step_size * a
    t_next = t + step_size * w
    t_next = np.mod(t_next+np.pi, 2*np.pi)-np.pi
    
    state_next = (x_next, y_next, v_next, t_next)
    
    return state_next
    
def reset_unicycle(x,y,v,theta):
    state = (x,y,v,theta)
    return state

def saturate_vel_unicycle(state, sat):
    x,y,v,t = state
    b = v<sat
    s = b*v + (1-b)*sat
    return x,y,s,t


""" Simulate pointmass """
T = 100
states = np.zeros((T,4))
states[0] = reset_pointmass(x=0,y=0,vx=0,vy=0)
max_vel =  0.5
for t in range(T-1):
    action = (0.1,0.1)
    state = step_pointmass(states[t], action)
    state = saturate_vel_pointmass(state, max_vel)
    states[t+1] = state
    
plt.plot(states) 
plt.legend(['x','y','vx','vy'])


""" Simulate unicycle """
T = 100
states = np.zeros((T,4))
states[0] = reset_unicycle(x=0,y=0,v=0,theta=0)
max_vel = 0.5
for t in range(T-1):
    action = (0.1,0.1)
    state = step_unicycle(states[t], action)
    state = saturate_vel_unicycle(state, max_vel)
    states[t+1] = state
    
plt.figure()
plt.plot(states)
plt.legend(['x','y','v','theta'])

<matplotlib.legend.Legend at 0x7fb1811e41d0>

## Pursuit evasion model

In [2]:
def dist_sq(state1, state2):
    x1,y1,_,_ = state1
    x2,y2,_,_ = state2
    return (x1-x2)**2 + (y1-y2)**2

class PursuitEvasion001:
    def __init__(self):
        self.num_pursuers = 2
        self.num_evaders = 1
        
        self.vel_pursuer = 0.5
        self.vel_evader = 1.
        
        self.capture_radius = 0.1
        
    def step(self, act):
        act1, act2, act3 = act
        
        state1 = step_pointmass(self.states[0],  act1)
        state2 = step_unicycle(self.states[1],  act2)
        state3 = step_unicycle(self.states[2],  act3)
        
        state1 = saturate_vel_pointmass(state1, self.vel_evader)
        state2 = saturate_vel_unicycle(state2, self.vel_pursuer)
        state3 = saturate_vel_unicycle(state3, self.vel_pursuer)
        
        cap2 = (dist_sq(state1,state2) < self.capture_radius**2)*1
        cap3 = (dist_sq(state1,state3) < self.capture_radius**2)*1
        
        rews = (-cap2-cap3, cap2, cap3)
        done = cap2+cap3>1
        
        self.states = (state1, state2, state3)
        return self.states, rews, done, {}
        
    
    def reset(self):
        xs = np.random.uniform(-1,1,3)
        ys = np.random.uniform(-1,1,3)
        thetas = np.random.uniform(-np.pi,np.pi,2)
        
        state1 = reset_pointmass(x=xs[0], y=ys[0], vx=0, vy=0)
        state2 = reset_unicycle(x=xs[1], y=ys[1], v=0, theta=thetas[0])
        state3 = reset_unicycle(x=xs[2], y=ys[2], v=0, theta=thetas[1])
        
        self.states = (state1, state2, state3)

peg = PursuitEvasion001()
peg.reset()

evader_velx_act = 0
evader_vely_act = 0

pursuer1_vel_act = 0
pursuer1_theta_act = 0

pursuer2_vel_act = 0
pursuer2_theta_act = 0

acts = ((evader_velx_act, evader_vely_act),
        (pursuer1_vel_act, pursuer1_theta_act),
        (pursuer2_vel_act, pursuer2_theta_act))

peg.step(acts)



(((-0.7157664929906005, 0.032818598823320055, 0.0, 0.0),
  (0.18493184415072106, -0.9612833783666945, 0.0, -0.01825644748915156),
  (0.9286474095754902, -0.918548165829215, 0.0, 2.2472599798360813)),
 (0, 0, 0),
 False,
 {})

In [3]:
# BATCH EXPERIMENTS
xs = np.zeros(5)
ys = np.zeros(5)
vxs = np.ones(5)
vys = np.ones(5)
acts = np.zeros((2,5))
state1 = reset_pointmass(x=xs, y=ys, vx=vxs,vy=vys)
step_pointmass(state1, acts)
saturate_vel_pointmass(state1, 0.1)

(array([0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0.]),
 array([0.07071068, 0.07071068, 0.07071068, 0.07071068, 0.07071068]),
 array([0.07071068, 0.07071068, 0.07071068, 0.07071068, 0.07071068]))