In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import inspect
currentdir = os.path.dirname(os.path.abspath(
    inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
os.sys.path.insert(1, parentdir+'/src')
import torch
import torch.nn.functional as F
from torchvision import transforms
import numpy as np
import time
from tqdm import trange
import matplotlib.pyplot as plt
import matplotlib

In [3]:
import gym
import pybullet as p
import stage.envs
from stage.tasks.kuka import KukaPETS
from stage.utils.nn import use_gpu
use_gpu()

In [4]:
savepath = parentdir + '/data/kuka/'

In [5]:
kuka = KukaPETS(render=False, 
                   action_parameterization='pd')
nq, nv, nu, nx, na = kuka.nq, kuka.nv, kuka.nu, kuka.nx, kuka.na

In [None]:
_ = kuka.learn(30, verbose=True)
kuka.save_training_data(savepath+'data_pd_reg')

Network training: 100%|██████████| 10/10 [00:00<00:00, 25.21epoch(s)/s, Training loss MSE=25.523548]


Iteration:  0
avg. decision time:  0.0047092644373575845
obs. reward:  -770.3727
act. reward:  -3.2394142


Network training: 100%|██████████| 10/10 [00:01<00:00,  7.85epoch(s)/s, Training loss MSE=29.205145]


Iteration:  1
avg. decision time:  0.08153150558471679
obs. reward:  -572.96204
act. reward:  -3.607087


Network training: 100%|██████████| 10/10 [00:00<00:00, 10.80epoch(s)/s, Training loss MSE=20.453798]


Iteration:  2
avg. decision time:  0.08151360352834065
obs. reward:  -634.61816
act. reward:  -2.3210082


Network training: 100%|██████████| 10/10 [00:02<00:00,  4.29epoch(s)/s, Training loss MSE=17.63022]


Iteration:  3
avg. decision time:  0.08158031622568766
obs. reward:  -975.5865
act. reward:  -2.3097506


Network training: 100%|██████████| 10/10 [00:02<00:00,  3.33epoch(s)/s, Training loss MSE=15.047351]


Iteration:  4
avg. decision time:  0.08166618982950846
obs. reward:  -585.1438
act. reward:  -3.5828965


Network training: 100%|██████████| 10/10 [00:02<00:00,  4.02epoch(s)/s, Training loss MSE=12.890215]


Iteration:  5
avg. decision time:  0.08167794704437256
obs. reward:  -614.1107
act. reward:  -2.8040962


Network training: 100%|██████████| 10/10 [00:04<00:00,  2.46epoch(s)/s, Training loss MSE=8.73883] 


Iteration:  6
avg. decision time:  0.08106733798980713
obs. reward:  -540.0432
act. reward:  -1.3638719


Network training: 100%|██████████| 10/10 [00:04<00:00,  2.13epoch(s)/s, Training loss MSE=5.713074]


Iteration:  7
avg. decision time:  0.08141330560048421
obs. reward:  -624.7467
act. reward:  -0.10202512


Network training: 100%|██████████| 10/10 [00:05<00:00,  1.87epoch(s)/s, Training loss MSE=4.963088]


Iteration:  8
avg. decision time:  0.08136193752288819
obs. reward:  -424.9721
act. reward:  -3.151263


Network training: 100%|██████████| 10/10 [00:05<00:00,  1.71epoch(s)/s, Training loss MSE=4.003742]


Iteration:  9
avg. decision time:  0.08196020444234212
obs. reward:  -424.80273
act. reward:  -0.10315835


Network training: 100%|██████████| 10/10 [00:06<00:00,  1.55epoch(s)/s, Training loss MSE=3.519633]


Iteration:  10
avg. decision time:  0.08145439942677815
obs. reward:  -315.98312
act. reward:  -0.048187617


Network training: 100%|██████████| 10/10 [00:06<00:00,  1.44epoch(s)/s, Training loss MSE=2.4214466]


Iteration:  11
avg. decision time:  0.08192819754282633
obs. reward:  -334.97647
act. reward:  -0.14074129


Network training: 100%|██████████| 10/10 [00:07<00:00,  1.32epoch(s)/s, Training loss MSE=2.0738056]


Iteration:  12
avg. decision time:  0.08143266995747885
obs. reward:  -302.67618
act. reward:  -0.32418382


Network training:  10%|█         | 1/10 [00:00<00:07,  1.28epoch(s)/s, Training loss MSE=2.0375028]

In [None]:
traj, log = kuka.perform(kuka.goal)
act_seq = traj[:, 14:28]
initial_obs = traj[0, :14]
final_obs = traj[-1, :14]

In [None]:
n_sample = 50
traj_pred = kuka.dynamics.unroll(initial_obs, 
                                 act_seq, 
                                 n_sample)

predicted_err = torch.norm(traj_pred[-1, :, :]-final_obs.expand(n_sample, -1), p=2, dim=1)
print (predicted_err.mean())
print (predicted_err.std())

In [None]:
traj_pred_mean = torch.mean(traj_pred, dim=1)
traj_pred_std = torch.std(traj_pred, dim=1)
traj_pred_mean = traj_pred_mean.detach().cpu().numpy()
traj_pred_std = traj_pred_std.detach().cpu().numpy()
traj = traj.detach().cpu().numpy()

In [None]:
font = {'family' : 'serif',
        'size'   : 22}

matplotlib.rc('font', **font)
np.set_printoptions(precision=3, linewidth=200, suppress=True)

In [None]:
# traj_pred_mean = np.load('traj_pred_mean.npy')
# traj_pred_std = np.load('traj_pred_std.npy')
# traj = np.load('traj.npy')

In [None]:
d = 1
dt = 0.01
task_horizon = 150
fig, ax = plt.subplots(d, figsize=(10, d * 6))
t = np.arange(0.0, task_horizon*dt, dt)

i = 1
desired = kuka.step_cost.desired[i].detach().cpu().numpy()*np.ones(len(t))

ax.plot(t, traj_pred_mean[:, i], lw=4, color='b', label='predicted mean')
ax.plot(t, traj[:, i], lw=4, ls='-.', color='orange', label='actual')

ax.plot(t, desired, lw=2, color='k', ls='-.', label='desired')

lb = traj_pred_mean - traj_pred_std
ub = traj_pred_mean + traj_pred_std
ax.fill_between(t, lb[:, i], ub[:, i], facecolor='blue',
            alpha=0.2, label='one-std region')
_ = ax.grid()
_ = ax.set_ylabel('Joint angle [rad]')
_ = ax.set_xlabel('Time [s]')
# _ = ax.set_xlim([0,0.5])
_ = ax.set_ylim([-3.2, 3.2])

ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.3),
             ncol=2, fancybox=True, shadow=True)
# for i in range(d):
#     ax[i].plot(t, traj[:, i], lw=4, color='orange', label='actual')
#     ax[i].plot(t, traj_pred_mean[:, i], lw=4, color='b', label='predicted mean')
#     lb = traj_pred_mean - traj_pred_std
#     ub = traj_pred_mean + traj_pred_std
#     ax[i].fill_between(t, lb[:, i], ub[:, i], facecolor='blue',
#                 alpha=0.2)
# fig.savefig('prediction_with_reg_150steps' + '.png', bbox_inches='tight')