In [3]:
import numpy as np
import os, json
from argparse import Namespace
from flat_reach_RL import *
# from nnet_util import torch_model_to_nnet, onnx_to_nnet
# import onnxruntime

In [6]:
# demo_path = "/Users/changliuliu/Documents/GitHub/Composable_Agent_Toolbox/examples/output/FlatReach_near_C200_H32_Nm100.0_lr0.001"
demo_path = "/home/ruic/Documents/RESEARCH/ICL/Composable_Agent_Toolbox/examples/output/FlatReach_near_C200_H32_Nm100.0_lr0.001"
demo_ep = 300

with open(os.path.join(demo_path, "args.json"), "r") as infile:
    config = json.load(infile)
args_saved = Namespace(**config)

args_saved.demo = True
args_saved.demo_path = demo_path
args_saved.demo_ep = demo_ep

args = args_saved

STATE_DIMENSION = 8
ACTION_SPACE_SIZE = 17

# device = 'cpu'
device = 'cuda'

In [8]:
def state_to_tensor(s, device):
    '''
        [1, dS]
    '''
    return torch.from_numpy(np.asarray(s)).unsqueeze(0).float().to(device)

In [7]:
Q = QNetwork(env_name=args.env_name,
            state_dim=STATE_DIMENSION,
            num_of_actions=ACTION_SPACE_SIZE,
            hidden_size=args.hidden_size).to(device)

checkpt_path = os.path.join(args.demo_path, 'checkpts', '{}_{}.pt'.format(args.env_name, args.demo_ep))
Q.load_model_weights(checkpt_path)
Q.eval()

QNetwork(
  (model): Sequential(
    (0): Linear(in_features=8, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=17, bias=True)
  )
)

In [20]:
ANGLE_DISCRETE_SIZE = 8
ACC_MIN = 50
ACC_MAX = 100
ACC_DISCRETE_SIZE = 2
ACTION_SPACE_SIZE = 1 + ANGLE_DISCRETE_SIZE * ACC_DISCRETE_SIZE

def get_action(Q, state, device):

    action_id = Q(state_to_tensor(state, device)).argmax().item()

    if action_id == 0:
        action = [0, 0]
    else:
        action_id_non_zero = action_id - 1
        angle = 2*np.pi / ANGLE_DISCRETE_SIZE * (action_id_non_zero % ANGLE_DISCRETE_SIZE)
        mag = np.floor(action_id_non_zero/ANGLE_DISCRETE_SIZE)*(ACC_MAX-ACC_MIN)/(ACC_DISCRETE_SIZE-1) + ACC_MIN
        action = [mag*np.cos(angle), mag*np.sin(angle)]
    
    return action_id, np.asarray(action).reshape(-1)

# numerical examples
# state = [goal_rel_pos, goal_rel_vel, obs_rel_pos, obs_rel_vel]

# When obstacle is far, changing goal distance should not change action
state = [10, 0, -1, 0, 0, 50, 0, 0]
print(get_action(Q, state, device))

state = [5, 0, -1, 0, 0, 50, 0, 0]
print(get_action(Q, state, device))

# When obstacle is not on the way to goal, changing obstacle vel should not change action
state = [10, 0, -1, 0, 0, 50, 0, 0]
print(get_action(Q, state, device))

state = [10, 0, -1, 0, 0, 50, 0, -1]
print(get_action(Q, state, device))

# When obstacle is on the way to goal, changing goal distance should not change action
state = [20, 0, -1, 0, 10, 0, -2, 0]
print(get_action(Q, state, device))

state = [25, 0, -1, 0, 10, 0, -2, 0]
print(get_action(Q, state, device))

(9, array([100.,   0.]))
(9, array([100.,   0.]))
(9, array([100.,   0.]))
(9, array([100.,   0.]))
(16, array([ 70.71067812, -70.71067812]))
(16, array([ 70.71067812, -70.71067812]))


In [4]:
dummy_input = torch.randn(1, STATE_DIMENSION).to(device)
input_names = ["dummy_input"]
output_names = ["output"]

torch.onnx.export(Q, 
                  dummy_input,
                  f"{demo_ep}.onnx",
                  verbose=False,
                  input_names=input_names,
                  output_names=output_names,
                  export_params=True,
                  )

In [5]:
torch_model_to_nnet(Q, f"{demo_ep}.nnet", decimal=8) # saving 8 digits. More can be used, but that requires more more space.

Converted pytorch model to an NNet model at 300.nnet


In [6]:
onnx_to_nnet(f"{demo_ep}.onnx", f"{demo_ep}.nnet", decimal=8)

Converted ONNX model at 300.onnx
    to an NNet model at 300.nnet


In [7]:
def onnx_inference(file, input):
    sess = onnxruntime.InferenceSession(
        file, providers=onnxruntime.get_available_providers())
    input_name = sess.get_inputs()[0].name
    pred_onx = sess.run(None, {input_name: input.astype(np.float32)})[0]
    print(pred_onx)

input = np.array([[1,2,3,4,5,6,7,8]])
onnx_inference("300.onnx", input)

[[930.37695 932.53015 937.174   936.2096  940.66034 927.52344 930.07733
  928.99    928.7442  941.5904  945.49884 953.3901  935.059   929.7238
  925.0194  935.30005 934.61084]]


In [None]:
# Test nnet in julia using the following code:
# the output could be slightly different due to digits used for saving nnet

using NeuralVerification
net = read_nnet("300.nnet")

# Define the problem
#TODO: change to realistic values
input = [1,2,3,4,5,6,7,8] 
radius = [1,1,1,1,1,1,1,1]

# Check the desired action
output = NeuralVerification.compute_output(net, input)
desired_action = argmax(output)

In [None]:
# Now check whether the input can tolerate some perturbation
input_set  = NeuralVerification.Hyperrectangle(input, radius)
# TODO: automatically generate the output_set
output_set = NeuralVerification.HPolytope([NeuralVerification.HalfSpace([1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0], 0.0)])

problem = Problem(net, input_set, output_set)
solver = MIPVerify()
result = solve(solver, problem)

In [None]:
# PROBLEM 1: CHECK IF THE POLICY IS ROBUST UNDER INPUT PERTURBATION ** CHECK EVERY ACTION PAIR

# PROBLEM 2: COMPUTE THE MAXIMUM RANGE OF PERTURBATION

# PROBLEM 3: COMPARE THE PERFORMANCE OF DIFFERENT SOLVERS: computation time, conservativeness
