In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import inspect
currentdir = os.path.dirname(os.path.abspath(
    inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
os.sys.path.insert(1, parentdir+'/src')
import torch
import torch.nn.functional as F
from torchvision import transforms
import numpy as np
import time
from tqdm import trange
import matplotlib.pyplot as plt
import matplotlib

In [None]:
import gym
import pybullet as p
import stage.envs
from stage.tasks.kuka.reaching import KukaReaching
from stage.utils.nn import use_gpu
use_gpu()

In [None]:
font = {'family' : 'serif',
        'size'   : 22}

matplotlib.rc('font', **font)
np.set_printoptions(precision=3, linewidth=200, suppress=True)

In [None]:
savepath = parentdir + '/data/kuka/'

In [None]:
# data_train = np.load(savepath+'data_pd_reg.npy')
# task = KukaReaching(render=True)
# task.visualize_training_data(data_train, 0)

In [None]:
task = KukaReaching(render=False)

# Read task parameters, should we use a separate class to wrap this?

nq, nv, nu, nx = task.nq, task.nv, task.nu, task.nx
dt_control, dt_env = task.dt_control, task.dt_env
q_lb, q_ub = task.q_lb, task.q_ub
v_lb, v_ub = -100 * torch.ones_like(q_lb), 100 * torch.ones_like(q_ub)

In [None]:
# Setup action parameterization

from stage.controllers.actor import Actor
from stage.controllers.pd import PD

na = 14
gain_ub = torch.Tensor([30, 15, 30, 30, 5, 3, 0.1])
gain_lb = 0. * torch.ones((nq))
action_ub = torch.cat((gain_ub, q_ub))
action_lb = torch.cat((gain_lb, q_lb))
actor = Actor(PD(nx, nq, nv, nu), action_lb, action_ub)

task.cost.actor = actor

In [None]:
# Setup model learning

from stage.dynamics.probabilistic_ensemble import ProbabilisticEnsemble, DefaultDx

ensemble_size = 5 
batch_size = 64
epochs = 10
dynamics = ProbabilisticEnsemble(nx, nq, nv, na, dt_control, 
                                 DefaultDx,
                                 ensemble_size, 
                                 learning_rate=0.001)

dynamics.state_lb = torch.cat((q_lb, v_lb))
dynamics.state_ub = torch.cat((q_ub, v_ub))

In [None]:
# Setup controller

from stage.controllers.pets import PETS

plan_horizon = 60
n_particles = 20
pop_size = 500
assert n_particles % ensemble_size == 0

controller = PETS(dynamics, task.cost, actor,
                  plan_horizon, n_particles, pop_size)

In [None]:
# Setup learner
from stage.learners.learn_and_control_model import LearnAndControlModel
learner = LearnAndControlModel(task, dynamics, controller, epochs, batch_size)

In [None]:
q_start = torch.Tensor(task.q_start).unsqueeze(0)
q_desired = task.cost.desired[:nq].unsqueeze(0)

print (task.cost.fwk(q_start, 6)[:, :3, 3])
print (task.cost.fwk(q_desired, 6)[:, :3, 3])

In [None]:
lip_reg = False

if lip_reg:
    controller.regularize(1)
    file_name = savepath + 'data_pd_reg'
else: 
    controller.regularize(0)
    file_name = savepath + 'data_pd_noreg'
    
_ = learner.learn(50, verbose=True)
learner.save_training_data(file_name)

In [None]:
traj, log = task.perform(task.goal, controller)
act_seq = traj[:, nx:nx+na]
initial_obs = traj[0, :nx]
final_obs = traj[-1, :nx]

In [None]:
n_sample = 50
traj_pred = dynamics.unroll(initial_obs, 
                            act_seq, 
                            n_sample)

predicted_err = torch.norm(traj_pred[-1, :, :]-final_obs.expand(n_sample, -1), p=2, dim=1)
print (predicted_err.mean())
print (predicted_err.std())

In [None]:
task_horizon = task.task_horizon
ee_pred = torch.zeros(task_horizon, n_sample, 3)
ee = torch.zeros(task_horizon, 3)

for n in range(task_horizon):
    ee_pred[n] = task.cost.fwk(traj_pred[n,:,:nq], 6)[:, :3, 3]
    ee[n] = task.cost.fwk(traj[n:n+1,:nq], 6)[:, :3, 3]

In [None]:
ee_pred_mean = torch.mean(ee_pred, dim=1)
ee_pred_std = torch.std(ee_pred, dim=1)

ee_pred_mean_np = ee_pred_mean.detach().cpu().numpy()
ee_pred_std_np = ee_pred_std.detach().cpu().numpy()
ee_np = ee.detach().cpu().numpy()

goal = task.cost.goal.repeat(task_horizon, 1)
goal = goal.detach().cpu().numpy()

In [None]:
plt.scatter(ee_np[:,0],ee_np[:,1])

In [None]:
d = 3
dt = dt_control
fig, ax = plt.subplots(d, figsize=(10, d * 6))
t = np.arange(0.0, task_horizon*dt, dt)
dlb = ee_pred_mean_np - ee_pred_std_np
ub = ee_pred_mean_np + ee_pred_std_np

for i in range(d):
    ax[i].plot(t, ee_np[:, i], lw=4, color='orange', label='actual')
    ax[i].plot(t, ee_pred_mean_np[:, i], lw=4, color='b', label='predicted mean')
    ax[i].plot(t, goal[:, i], lw=2, color='k', ls='-.', label='goal')
    lb = ee_pred_mean_np - ee_pred_std_np
    ub = ee_pred_mean_np + ee_pred_std_np
    ax[i].fill_between(t, lb[:, i], ub[:, i], facecolor='blue',
                alpha=0.2)
    _ = ax[i].grid()
#     _ = ax[i].set_ylim([-3.2, 3.2])
#     ax[i].legend(loc='upper center', bbox_to_anchor=(0.5, 1.3),
#              ncol=3, fancybox=True, shadow=True)
# fig.savefig('prediction_with_reg_150steps' + '.png', bbox_inches='tight')

In [None]:
traj_pred_mean = torch.mean(traj_pred, dim=1)
traj_pred_std = torch.std(traj_pred, dim=1)

traj_pred_mean_np = traj_pred_mean.detach().cpu().numpy()
traj_pred_std_np = traj_pred_std.detach().cpu().numpy()
traj_np = traj.detach().cpu().numpy()

desired = task.cost.desired.repeat((task_horizon, 1))
desired = desired.detach().cpu().numpy()

d = nq
dt = dt_control
fig, ax = plt.subplots(d, figsize=(10, d * 6))
t = np.arange(0.0, task_horizon*dt, dt)
dlb = traj_pred_mean_np - traj_pred_std_np
ub = traj_pred_mean_np + traj_pred_std_np

for i in range(d):
    ax[i].plot(t, traj_np[:, i], lw=4, color='orange', label='actual')
    ax[i].plot(t, traj_pred_mean_np[:, i], lw=4, color='b', label='predicted mean')
    ax[i].plot(t, desired[:, i], lw=2, color='k', ls='-.', label='desired')
    lb = traj_pred_mean_np - traj_pred_std_np
    ub = traj_pred_mean_np + traj_pred_std_np
    ax[i].fill_between(t, lb[:, i], ub[:, i], facecolor='blue',
                alpha=0.2)
    _ = ax[i].grid()
#     _ = ax[i].set_ylim([-3.2, 3.2])
#     ax[i].legend(loc='upper center', bbox_to_anchor=(0.5, 1.3),
#              ncol=3, fancybox=True, shadow=True)
# fig.savefig('prediction_with_reg_150steps' + '.png', bbox_inches='tight')