In [1]:
import numpy as np
import os, json
from argparse import Namespace
from nnet_util import torch_model_to_nnet, onnx_to_nnet
from flat_reach_safety_gym_RL import *
import onnxruntime

pybullet build time: May 20 2022 19:43:01
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# demo_path = "/Users/changliuliu/Documents/GitHub/Composable_Agent_Toolbox/examples/output/FlatReach_near_C200_H32_Nm100.0_lr0.001"
demo_path = "/home/ruic/Documents/RESEARCH/ICL/Composable_Agent_Toolbox/examples/output/FlatReachSafetyGym_ISSA_None_C400_H256_Nm100.0_lr0.0001"
demo_ep = 1200

with open(os.path.join(demo_path, "args.json"), "r") as infile:
    config = json.load(infile)
args_saved = Namespace(**config)

args_saved.demo = True
args_saved.demo_path = demo_path
args_saved.demo_ep = demo_ep

args = args_saved

args.env_name = "FlatReachSafetyGym_ISSA"

STATE_DIMENSION = 12
ACTION_SPACE_SIZE = 25

# device = 'cpu'
device = 'cuda'

In [3]:
def state_to_tensor(s, device):
    '''
        [1, dS]
    '''
    return torch.from_numpy(np.asarray(s)).unsqueeze(0).float().to(device)

In [4]:
Q = QNetwork(env_name=args.env_name,
            state_dim=STATE_DIMENSION,
            num_of_actions=ACTION_SPACE_SIZE,
            hidden_size=args.hidden_size).to(device)

checkpt_path = os.path.join(args.demo_path, 'checkpts', '{}_{}.pt'.format(args.env_name, args.demo_ep))
Q.load_model_weights(checkpt_path)
Q.eval()

QNetwork(
  (model): Sequential(
    (0): Linear(in_features=12, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=25, bias=True)
  )
)

In [20]:
LIN_DISCRETE_SIZE = 5
LIN_MIN = -0.01
LIN_MAX = 0.01
ROT_DISCRETE_SIZE = 5
ROT_MIN = -10.0
ROT_MAX = 10.0
ACTION_SPACE_SIZE = LIN_DISCRETE_SIZE * ROT_DISCRETE_SIZE

def get_action(Q, state, device):

    action_id = Q(state_to_tensor(state, device)).argmax().item()

    lin_id = action_id // ROT_DISCRETE_SIZE
    rot_id = action_id % ROT_DISCRETE_SIZE

    action = [
        LIN_MIN + (LIN_MAX-LIN_MIN)/(LIN_DISCRETE_SIZE-1) * lin_id,
        ROT_MIN + (ROT_MAX-ROT_MIN)/(ROT_DISCRETE_SIZE-1) * rot_id
    ]
    
    return np.asarray(action).reshape(-1, 1)

def get_state(vel_R, heading_R, pos_G_rel, pos_O_rel):

    state = []

    # robot
    v           = vel_R.reshape(-1)
    vnorm       = np.linalg.norm(v)
    heading     = heading_R
    heading_vec = np.array([math.cos(heading), math.sin(heading)])

    # robot - goal
    e              = pos_G_rel.reshape(-1)
    dist_goal      = np.linalg.norm(e)
    heading_target = math.atan2(e[1], e[0])

    # add goal info
    state += list([dist_goal])
    state += list([np.dot(e, heading_vec)])
    state += list([vnorm])
    state += list([vnorm*math.cos(heading_target - heading)])
    state += list([vnorm*math.sin(heading_target - heading)])
    state += list([math.atan2(math.sin(heading_target - heading), math.cos(heading_target - heading))])

    # add obs info
    e_obs       = pos_O_rel.reshape(-1)
    dist_obs    = np.linalg.norm(e_obs)
    heading_obs = math.atan2(e_obs[1], e_obs[0])

    state += list([dist_obs])
    state += list([np.dot(e_obs, heading_vec)])
    state += list([vnorm])
    state += list([vnorm*math.cos(heading_obs - heading)])
    state += list([vnorm*math.sin(heading_obs - heading)])
    state += list([math.atan2(math.sin(heading_obs - heading), math.cos(heading_obs - heading))])

    return state

# numerical examples

# case 1 (success)
# When obstacle is in the way, changing goal y pos should not change action

print('------------------- case 1 -------------------')

robot_x = 0
robot_y = 0.3
robot_rot = -np.pi/2
goal_x = 0
goal_y = 2 # case 1 +/- 0.2
hazard_x = -0.2 # case 1 -0.2
hazard_y = 1

state = get_state(
    vel_R = np.array([0, 1]),
    heading_R = robot_rot,
    pos_G_rel = np.array([goal_x-robot_x, goal_y-robot_y]),
    pos_O_rel = np.array([hazard_x-robot_x, hazard_y-robot_y])
)
print(state)
print(get_action(Q, state, device))

state = get_state(
    vel_R = np.array([0, 1]),
    heading_R = robot_rot,
    pos_G_rel = np.array([goal_x-robot_x, goal_y-robot_y+0.2]),
    pos_O_rel = np.array([hazard_x-robot_x, hazard_y-robot_y])
)
print(state)
print(get_action(Q, state, device))

------------------- case 1 -------------------
[1.7, -1.7, 1.0, -1.0, 1.2246467991473532e-16, 3.141592653589793, 0.7280109889280517, -0.7, 1.0, -0.9615239476408232, -0.274721127897378, -2.863292994584682]
[[-0.01]
 [-5.  ]]
[1.9, -1.9, 1.0, -1.0, 1.2246467991473532e-16, 3.141592653589793, 0.7280109889280517, -0.7, 1.0, -0.9615239476408232, -0.274721127897378, -2.863292994584682]
[[-0.01]
 [-5.  ]]


In [19]:
# case 2 (fail)
# When obstacle is not in the way, changing obstacle x pos should not change action

print('------------------- case 2 -------------------')

robot_x = 0
robot_y = 0.3
robot_rot = -np.pi/2
goal_x = 0
goal_y = 2
hazard_x = 0.0
hazard_y = -0.4

state = get_state(
    vel_R = np.array([0, 1]),
    heading_R = robot_rot,
    pos_G_rel = np.array([goal_x-robot_x, goal_y-robot_y]),
    pos_O_rel = np.array([hazard_x-robot_x, hazard_y-robot_y])
)
print(state)
print(get_action(Q, state, device))

state = get_state(
    vel_R = np.array([0, 1]),
    heading_R = robot_rot,
    pos_G_rel = np.array([goal_x-robot_x, goal_y-robot_y]),
    pos_O_rel = np.array([hazard_x-robot_x+0.1, hazard_y-robot_y])
)
print(state)
print(get_action(Q, state, device))

------------------- case 2 -------------------
[1.7, -1.7, 1.0, -1.0, 1.2246467991473532e-16, 3.141592653589793, 0.7, 0.7, 1.0, 1.0, 0.0, 0.0]
[[-0.01]
 [ 0.  ]]
[1.7, -1.7, 1.0, -1.0, 1.2246467991473532e-16, 3.141592653589793, 0.7071067811865475, 0.7, 1.0, 0.9899494936611666, 0.1414213562373094, 0.1418970546041638]
[[0.]
 [5.]]


In [7]:
torch_model_to_nnet(Q, f"{demo_ep}.nnet", decimal=8) # saving 8 digits. More can be used, but that requires more more space.

Converted pytorch model to an NNet model at 1200.nnet


In [1]:
# Test nnet in julia using the following code:
# the output could be slightly different due to digits used for saving nnet

using NeuralVerification
net = read_nnet("1200.nnet")

Network(NeuralVerification.Layer[NeuralVerification.Layer{NeuralVerification.ReLU, Float64}([0.24342039 0.03698453 … -0.27498862 -0.1547408; 0.09474183 -0.18618044 … -0.11526605 0.30972815; … ; 0.082394 -0.2601743 … -0.34052637 0.12294344; 0.04755301 0.11737449 … -0.16945669 -0.08002926], [-0.22134753, 0.46379456, 0.07607478, 0.54365259, 0.21357113, -0.23324615, -0.0016113, -0.15920782, -0.14313482, -0.498936  …  0.34498838, 0.13475046, 0.11413261, -0.15794192, -0.04125132, 0.08076145, 0.41826671, -0.14050023, 0.29482433, 0.34801096], NeuralVerification.ReLU()), NeuralVerification.Layer{NeuralVerification.ReLU, Float64}([0.02563666 0.23543376 … 0.09984681 -1.96628988; -0.07438762 0.02845028 … -0.18539959 0.23890342; … ; -0.15853742 0.27787173 … 0.16925441 -1.82002664; -0.14807875 0.08876662 … -0.0500313 -1.06449413], [0.00389475, -0.12922841, 0.15374568, 0.03182315, 0.1785596, 0.04491991, 0.28510159, -0.05970472, 0.07842706, 0.23793134  …  0.21913138, 0.09256473, 0.13260208, 0.23709306

In [2]:
function get_result(solver, problem)  
    model = NeuralVerification.Model(solver)
    z = NeuralVerification.init_vars(model, problem.network, :z, with_input=true)
    δ = NeuralVerification.init_vars(model, problem.network, :δ, binary=true)
    # get the pre-activation bounds:
    model[:bounds] = NeuralVerification.get_bounds(problem, before_act=true)
    model[:before_act] = true
    NeuralVerification.add_set_constraint!(model, problem.input, first(z))
    NeuralVerification.add_complementary_set_constraint!(model, problem.output, last(z))
    NeuralVerification.encode_network!(model, problem.network, NeuralVerification.BoundedMixedIntegerLP())
    o = NeuralVerification.max_disturbance!(model, first(z) - problem.input.center)
    NeuralVerification.optimize!(model)
    if NeuralVerification.termination_status(model) == NeuralVerification.OPTIMAL
        return (1,NeuralVerification.value(first(z)))
    else
        return (0,0.0)
    end
end

get_result (generic function with 1 method)

In [3]:
# Case 1: when the obstacle is far, changing goal distance should not change action
input = [1.7, -1.7, 1.0, -1.0, 1.2246467991473532e-16, 3.141592653589793, 0.7280109889280517, -0.7, 1.0, -0.9615239476408232, -0.274721127897378, -2.863292994584682]

# Check the desired action
output = NeuralVerification.compute_output(net, input)
desired_action = argmax(output) # Note Julia's index starts from 1

# Now check whether the input can tolerate some perturbation

# radius is calculated as the diff between two states from case 2 due to state features
radius = [0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # This parameter is tunable
input_set  = NeuralVerification.Hyperrectangle(input, radius)

# Choose solver
solver = MIPVerify()

# Problem 1. Now we check if the desired_action remains the same for all inputs in the input_set
for i in 1:25
    if i != desired_action
        vec = zeros(1,25)
        vec[i] = 1.0
        vec[desired_action] = -1.0
        output_set = NeuralVerification.HPolytope(vec, [0.0])
        problem = Problem(net, input_set, output_set)
        result = solve(solver, problem)
        println("action pair: ",i, ",", desired_action, ": ",result.status)
    end
end


action pair: 1,2: holds
action pair: 

3,2: holds


action pair: 4,2: holds
action pair: 

5,2: holds


action pair: 6,2: holds
action pair: 

7,2: holds


action pair: 8,2: holds
action pair: 

9,2: holds


action pair: 10,2: holds
action pair: 

11,2: holds


action pair: 12,2: holds
action pair: 

13,2: holds


action pair: 14,2: holds
action pair: 

15,2: holds


action pair: 16,2: holds
action pair: 

17,2: holds


action pair: 18,2: holds
action pair: 

19,2: holds


action pair: 20,2: holds
action pair: 

21,2: holds


action pair: 22,2: holds
action pair: 

23,2: holds


action pair: 24,2: holds
action pair: 

25,2: holds


In [4]:
# Case 2: When obstacle is not in the way, changing obstacle x pos should not change action
input = [1.7, -1.7, 1.0, -1.0, 1.2246467991473532e-16, 3.141592653589793, 0.7, 0.7, 1.0, 1.0, 0.0, 0.0]

# Check the desired action
output = NeuralVerification.compute_output(net, input)
desired_action = argmax(output) # Note Julia's index starts from 1

# Now check whether the input can tolerate some perturbation
radius = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.007106781186547506, 0.0, 0.0, 0.01005050633883342, 0.1414213562373094, 0.1418970546041638] # This parameter is tunable
input_set  = NeuralVerification.Hyperrectangle(input, radius)

# Choose solver
solver = MIPVerify()

# Problem 1. Now we check if the desired_action remains the same for all inputs in the input_set
for i in 1:25
    if i != desired_action
        vec = zeros(1,25)
        vec[i] = 1.0
        vec[desired_action] = -1.0
        output_set = NeuralVerification.HPolytope(vec, [0.0])
        problem = Problem(net, input_set, output_set)
        result = solve(solver, problem)
        println("action pair: ",i, ",", desired_action, ": ",result.status)
    end
end


action pair: 1,3: holds
action pair: 

2,3: holds


action pair: 4,3: holds
action pair: 

5,3: holds


action pair: 6,3: holds
action pair: 

7,3: holds


action pair: 8,3: holds
action pair: 

9,3: holds


action pair: 10,3: holds
action pair: 

11,3: holds


action pair: 12,3: holds
action pair: 

13,3: holds


action pair: 14,3: violated
action pair: 

15,3: holds


action pair: 16,3: holds
action pair: 

17,3: holds


action pair: 18,3: holds
action pair: 

19,3: holds


action pair: 20,3: holds
action pair: 

21,3: holds


action pair: 22,3: holds
action pair: 

23,3: holds


action pair: 24,3: holds
action pair: 

25,3: holds
