In [94]:
import copy
import sys

sys.path.append("./src")
sys.path.append("./src/rl")

import os
import pickle
import time

import pandas as pd
import gym
import numpy as np
import torch as th
import pybullet as p
from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
from gym_pybullet_drones.utils.enums import DroneModel, Physics
from gym_pybullet_drones.utils.utils import sync

# from gym_pybullet_drones.utils.Logger import Logger
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.a2c import MlpPolicy
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import obs_as_tensor, safe_mean

from gym.envs.registration import register

import pandas as pd
import plotly.express as px

from rl_crazyflie.envs.NavigationAviaryErr import NavigationAviaryErr
from rl_crazyflie.utils.Logger import Logger
from rl_crazyflie.utils.constants import Modes

In [150]:
# DIR = "results-nav-v-2"
DIR = "results-nav-b_dist-err-rew-w_i-v-51"

MODEL_PATH = f"./{DIR}/model"
ENV_PATH = f"./{DIR}/env"
LOGS_PATH = f"./{DIR}/logs"
TB_LOGS_PATH = f"./{DIR}/logs"
PLT_LOGS_PATH = f"./{DIR}/plt/it"

# define defaults
VIEW = False
DEFAULT_GUI = False
DEFAULT_RECORD_VIDEO = False
DEFAULT_OUTPUT_FOLDER = f"./{DIR}/rec"

DEFAULT_DRONES = DroneModel("cf2x")
DEFAULT_NUM_DRONES = 1
DEFAULT_PHYSICS = Physics("pyb")
DEFAULT_USER_DEBUG_GUI = False
DEFAULT_AGGREGATE = True
DEFAULT_OBSTACLES = True
DEFAULT_SIMULATION_FREQ_HZ = 50
DEFAULT_DURATION_SEC = 2
DEFAULT_CONTROL_FREQ_HZ = 48

INIT_XYZS = np.array([[1.0, 0.0, 0.0] for _ in range(DEFAULT_NUM_DRONES)])
INIT_RPYS = np.array([[0.0, 0.0, 0.0] for _ in range(DEFAULT_NUM_DRONES)])
NUM_PHYSICS_STEPS = 1

PERIOD = 10

# "train" / "test"
MODE = Modes.TEST

NUM_EVAL_EPISODES = 1
TEST_EXT_DIST_X_MAX = 0.1
TEST_EXT_DIST_XYZ_MAX = 0.05
TEST_EXT_DIST_STEPS = 3

FLIP_FREQ = 20

# hyperparams for training
NUM_EPISODES = 1e6
ACTOR_NET_ARCH = [50, 100, 500, 100, 50]
CRITIC_NET_ARCH = [50, 100, 500, 100, 50]
TRAIN_EXT_DIST = np.array(
    [
        [0.0, 0.0, 0.0],
        [0.05, 0.0, 0.0],
        [-0.05, 0.0, 0.0],
        [0.0, 0.0, 0.05],
        [0.0, 0.0, -0.05],
        [0.025, 0.025, 0.025],
        [-0.025, -0.025, -0.025],
    ]
)

In [151]:
nav_env = gym.make(
    "navigation-aviary-err-v0",
    **{
        "drone_model": DEFAULT_DRONES,
        "initial_xyzs": INIT_XYZS,
        "initial_rpys": INIT_RPYS,
        "freq": DEFAULT_SIMULATION_FREQ_HZ,
        "aggregate_phy_steps": NUM_PHYSICS_STEPS,
        "gui": DEFAULT_GUI,
        "record": DEFAULT_RECORD_VIDEO,
        "ext_dist_mag": [0, 0, 0],
        "flip_freq": FLIP_FREQ,
        "eval_reward": True,
    },
)

[INFO] BaseAviary.__init__() loaded parameters from the drone's .urdf:
[INFO] m 0.027000, L 0.039700,
[INFO] ixx 0.000014, iyy 0.000014, izz 0.000022,
[INFO] kf 0.000000, km 0.000000,
[INFO] t2w 2.250000, max_speed_kmh 30.000000,
[INFO] gnd_eff_coeff 11.368590, prop_radius 0.023135,
[INFO] drag_xy_coeff 0.000001, drag_z_coeff 0.000001,
[INFO] dw_coeff_1 2267.180000, dw_coeff_2 0.160000, dw_coeff_3 -0.110000



[33mWARN: Box bound precision lowered by casting to float32[0m



In [152]:
nav_env = pickle.load(open(ENV_PATH, "rb"))
model = PPO.load(MODEL_PATH, nav_env)
# model = PPO("MlpPolicy", nav_env)

In [153]:
obs = nav_env.reset()
OBS_LEN = model.observation_space.shape[0]
obs = th.zeros(size=obs.shape)

obs[2] = 1

obs = th.reshape(obs, shape=(-1, OBS_LEN))
print(obs, obs.shape)

tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) torch.Size([1, 15])


In [154]:
action, _ = model.predict(obs)

action = th.tensor(action)

print(action)

values1, log_proba, entropy = model.policy.evaluate_actions(obs, action)

values2 = model.policy.predict_values(obs)

print(values1, values2)
print(log_proba, entropy)

tensor([[-0.0313, -0.2909, -1.0000, -1.0000]])
tensor([[-0.5680]], grad_fn=<AddmmBackward0>) tensor([[-0.5680]], grad_fn=<AddmmBackward0>)
tensor([-16.6216], grad_fn=<SumBackward1>) tensor([-0.3630], grad_fn=<SumBackward1>)


In [155]:
MAX = 3
NUM_POINTS = 20

x = th.linspace(-MAX, MAX, NUM_POINTS)
y = th.linspace(-MAX, MAX, NUM_POINTS)
z = th.linspace(0, MAX, NUM_POINTS)

print("----- xyz")
print(x)
print(y)
print(z)

X, Y, Z = th.meshgrid(x, y, z)

print("------ mesh shapes")
print(X.shape, Y.shape, Z.shape)

observations = th.concat(
    [
        th.unsqueeze(X, dim=3),
        th.unsqueeze(Y, dim=3),
        th.unsqueeze(Z, dim=3),
    ],
    dim=3
)

print(observations.shape)
print(observations)

observations = th.reshape(observations, shape=(-1, 3))

# n x 3
print(observations)

----- xyz
tensor([-3.0000, -2.6842, -2.3684, -2.0526, -1.7368, -1.4211, -1.1053, -0.7895,
        -0.4737, -0.1579,  0.1579,  0.4737,  0.7895,  1.1053,  1.4211,  1.7368,
         2.0526,  2.3684,  2.6842,  3.0000])
tensor([-3.0000, -2.6842, -2.3684, -2.0526, -1.7368, -1.4211, -1.1053, -0.7895,
        -0.4737, -0.1579,  0.1579,  0.4737,  0.7895,  1.1053,  1.4211,  1.7368,
         2.0526,  2.3684,  2.6842,  3.0000])
tensor([0.0000, 0.1579, 0.3158, 0.4737, 0.6316, 0.7895, 0.9474, 1.1053, 1.2632,
        1.4211, 1.5789, 1.7368, 1.8947, 2.0526, 2.2105, 2.3684, 2.5263, 2.6842,
        2.8421, 3.0000])
------ mesh shapes
torch.Size([20, 20, 20]) torch.Size([20, 20, 20]) torch.Size([20, 20, 20])
torch.Size([20, 20, 20, 3])
tensor([[[[-3.0000, -3.0000,  0.0000],
          [-3.0000, -3.0000,  0.1579],
          [-3.0000, -3.0000,  0.3158],
          ...,
          [-3.0000, -3.0000,  2.6842],
          [-3.0000, -3.0000,  2.8421],
          [-3.0000, -3.0000,  3.0000]],

         [[-3.0000, -2

In [156]:
# n x 15
observations_pad = th.nn.functional.pad(observations, pad=[0, OBS_LEN - 3], mode="constant", value=0)

print(observations_pad.shape)

torch.Size([8000, 15])


In [157]:
values = model.policy.predict_values(observations_pad)

values = th.flatten(values.detach())

print(values.shape)

torch.Size([8000])


In [158]:
observations_xyz = observations_pad[:, 0:3]

true_values = th.norm(observations_xyz - th.tensor([0, 0, 1]), dim=1) ** 2

In [162]:
df = pd.DataFrame({
    "x": observations_xyz[:, 0],
    "y": observations_xyz[:, 1],
    "z": observations_xyz[:, 2],
    "value": values.numpy().tolist()
    # "value": true_values.numpy().tolist()
})

df.describe()

Unnamed: 0,x,y,z,value
count,8000.0,8000.0,8000.0,8000.0
mean,-6.103516e-08,-1.220703e-07,1.5,-0.567958
std,1.821045,1.821045,0.910522,0.0
min,-3.0,-3.0,0.0,-0.567958
25%,-1.5,-1.5,0.75,-0.567958
50%,-1.192093e-07,-1.192093e-07,1.5,-0.567958
75%,1.5,1.5,2.25,-0.567958
max,3.0,3.0,3.0,-0.567958


In [160]:
fig = px.scatter_3d(df, x='x', y='y', z='z',
                    color='value')
fig.show()