In [1]:
import os
import sys
 

current = os.getcwd()
 
parent = os.path.dirname(current)
 
sys.path.append(parent)

import torch

from mpc import mpc
from mpc.mpc import QuadCost, LinDx, GradMethods
from mpc.env_dx import pendulum

import numpy as np
import numpy.random as npr

import matplotlib.pyplot as plt

import os
import io
import base64
import tempfile
from IPython.display import HTML

from tqdm import tqdm

%matplotlib inline

In [5]:
params = torch.tensor((10., 1., 1.))
dx = pendulum.PendulumDx(params, simple=True)

n_batch, T, mpc_T = 1, 5, 4

def uniform(shape, low, high):
    r = high-low
    return torch.rand(shape)*r+low

torch.manual_seed(0)
th = uniform(n_batch, -(1/2)*np.pi, (1/2)*np.pi)
thdot = uniform(n_batch, -1., 1.)
xinit = torch.stack((torch.cos(th), torch.sin(th), thdot), dim=1)

x = xinit
u_init = None

# The cost terms for the swingup task can be alternatively obtained
# for this pendulum environment with:
# q, p = dx.get_true_obj()

mode = 'swingup'
# mode = 'spin'

if mode == 'swingup':
    goal_weights = torch.Tensor((1., 1., 0.1))
    goal_state = torch.Tensor((1., 0. ,0.))
    ctrl_penalty = 0.001
    q = torch.cat((
        goal_weights,
        ctrl_penalty*torch.ones(dx.n_ctrl)
    ))
    px = -torch.sqrt(goal_weights)*goal_state
    p = torch.cat((px, torch.zeros(dx.n_ctrl)))
    Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(
        mpc_T, n_batch, 1, 1
    )
    p = p.unsqueeze(0).repeat(mpc_T, n_batch, 1)
elif mode == 'spin':
    Q = 0.001*torch.eye(dx.n_state+dx.n_ctrl).unsqueeze(0).unsqueeze(0).repeat(
        mpc_T, n_batch, 1, 1
    )
    p = torch.tensor((0., 0., -1., 0.))
    p = p.unsqueeze(0).repeat(mpc_T, n_batch, 1)

t_dir = tempfile.mkdtemp()
print('Tmp dir: {}'.format(t_dir))
print("dx. ", dx.n_ctrl, dx.n_state)
print("Q.shape", Q.shape)

for i in range(mpc_T):
    for j in range(n_batch):
        for k in range(dx.n_ctrl+dx.n_state):
            Q[i,j,k, dx.n_ctrl+dx.n_state-1] = Q[i,j,dx.n_ctrl+dx.n_state-1,k] = 0


lower = -1.
lower = torch.tensor([-0.1, -2. , -0.1, -0.1, -1.]) #mpc_T = 5
lower = lower.unsqueeze(1).unsqueeze(1)
lower = lower.repeat(1,n_batch,1)

#print("lower bound", lower, lower.shape)
upper = 1.
for t in tqdm(range(T)):
    nominal_states, nominal_actions, nominal_objs = mpc.MPC(
        dx.n_state, dx.n_ctrl, mpc_T,
        u_init=u_init,
        u_lower=lower, u_upper=upper,
        lqr_iter=50,
        verbose=1,
        exit_unconverged=False,
        detach_unconverged=False,
        linesearch_decay=dx.linesearch_decay,
        max_linesearch_iter=dx.max_linesearch_iter,
        grad_method=GradMethods.AUTO_DIFF,
        eps=1e-2,
    )(x, QuadCost(Q, p), dx)
    
    next_action = nominal_actions[0]
    u_init = torch.cat((nominal_actions[1:], torch.zeros(1, n_batch, dx.n_ctrl)), dim=0)
    u_init[-2] = u_init[-3]
    x = dx(x, next_action)
    if(t == 1):
        A =1
        #print("nominal_states: ", nominal_states)
        #print("nominal_actions: ", nominal_actions)

    n_row, n_col = 4, 4
    fig, axs = plt.subplots(n_row, n_col, figsize=(3*n_col,3*n_row))
    axs = axs.reshape(-1)
    for i in range(n_batch):
        dx.get_frame(x[i], ax=axs[i])
        axs[i].get_xaxis().set_visible(False)
        axs[i].get_yaxis().set_visible(False)
    fig.tight_layout()
    fig.savefig(os.path.join(t_dir, '{:03d}.png'.format(t)))
    plt.close(fig)

Tmp dir: /tmp/tmpj5ntez9r
dx.  1 3
Q.shape torch.Size([4, 1, 4, 4])


  0%|          | 0/5 [00:00<?, ?it/s]

carlos 0 tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
carlos 1
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
Initial mean(cost): -1.9375e+00
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
Carlos lqr bounds tensor([[-2.]]) upper:  tensor([[1.]])
hey
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
| 0 | -1.9643e+00 | 2.00e+00 | 1.00e+00 | tensor([6.]) |
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[3.]])
hey
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
| 1 | -1.9643e+00 | 0.00e+00 | 1.00e+00 | 

 20%|██        | 1/5 [00:01<00:07,  1.91s/it]

carlos 0 tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
carlos 1
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
Initial mean(cost): -1.9747e+00
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-1.9000]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[1.9000]]) upper:  tensor([[3.]])
hey
| 0 | -1.9602e+00 | 2.69e+00 | 1.60e-03 | tensor([5.], grad_fn=<LQRStepFnBackward>) |
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-0.1736]]) upper:  tensor([[2.8264]])
hey
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
he

 40%|████      | 2/5 [00:03<00:04,  1.57s/it]

carlos 0 tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
carlos 1
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
Initial mean(cost): -1.9673e+00
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-1.9000]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[1.9000]]) upper:  tensor([[3.]])
hey
| 0 | -1.9506e+00 | 2.69e+00 | 1.60e-03 | tensor([5.], grad_fn=<LQRStepFnBackward>) |
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-0.1732]]) upper:  tensor([[2.8268]])
hey
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
he

 60%|██████    | 3/5 [00:04<00:02,  1.49s/it]

carlos 0 tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
carlos 1
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
Initial mean(cost): -1.9526e+00
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-1.9000]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[1.9000]]) upper:  tensor([[3.]])
hey
| 0 | -1.9019e+00 | 2.69e+00 | 1.60e-03 | tensor([5.], grad_fn=<LQRStepFnBackward>) |
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-1.8970]]) upper:  tensor([[1.1030]])
hey
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
he

 80%|████████  | 4/5 [00:05<00:01,  1.41s/it]

carlos 0 tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
carlos 1
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
tensor([[[-0.1000]],

        [[-2.0000]],

        [[-0.1000]],

        [[-0.1000]],

        [[-1.0000]]])
Initial mean(cost): -1.9278e+00
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-1.9000]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[1.9000]]) upper:  tensor([[3.]])
hey
| 0 | -1.8668e+00 | 2.69e+00 | 1.60e-03 | tensor([5.], grad_fn=<LQRStepFnBackward>) |
Carlos lqr bounds tensor([[-0.1000]]) upper:  tensor([[1.]])
hey
hey 2
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
hey
Carlos lqr bounds tensor([[-1.8970]]) upper:  tensor([[1.1030]])
hey
Carlos lqr bounds tensor([[0.]]) upper:  tensor([[1.1000]])
he

100%|██████████| 5/5 [00:07<00:00,  1.50s/it]


In [30]:
vid_fname = 'pendulum-{}.mp4'.format(mode)

if os.path.exists(vid_fname):
    os.remove(vid_fname)
    
cmd = 'ffmpeg -r 16 -f image2 -i {}/%03d.png -vcodec libx264 -crf 25  -pix_fmt yuv420p {}'.format(
    t_dir, vid_fname
)
os.system(cmd)
print('Saving video to: {}'.format(vid_fname))

ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --e

Saving video to: pendulum-swingup.mp4


frame=   20 fps=0.0 q=-1.0 Lsize=      69kB time=00:00:01.06 bitrate= 535.6kbits/s speed=2.28x    
video:68kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 1.477868%
[libx264 @ 0x560c1b52ec40] frame I:1     Avg QP:20.17  size: 47347
[libx264 @ 0x560c1b52ec40] frame P:12    Avg QP:28.98  size:  1194
[libx264 @ 0x560c1b52ec40] frame B:7     Avg QP:30.67  size:  1105
[libx264 @ 0x560c1b52ec40] consecutive B-frames: 40.0% 30.0% 30.0%  0.0%
[libx264 @ 0x560c1b52ec40] mb I  I16..4: 33.1% 46.4% 20.6%
[libx264 @ 0x560c1b52ec40] mb P  I16..4:  0.1%  0.8%  0.6%  P16..4:  0.5%  0.1%  0.1%  0.0%  0.0%    skip:97.8%
[libx264 @ 0x560c1b52ec40] mb B  I16..4:  0.0%  1.1%  0.2%  B16..8:  2.5%  0.3%  0.1%  direct: 0.0%  skip:95.6%  L0:51.7% L1:42.1% BI: 6.2%
[libx264 @ 0x560c1b52ec40] 8x8 transform intra:50.1% inter:13.2%
[libx264 @ 0x560c1b52ec40] coded y,uvDC,uvAC intra: 19.9% 0.0% 0.0% inter: 0.2% 0.0% 0.0%
[libx264 @ 0x560c1b52ec40] i16 v,h,dc,p: 55% 45%  0%  0%
[libx2

In [31]:
video = io.open(vid_fname, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii')))