## Import required libraries
### Author: Sameer
### Date: May 2019

In [2]:
import numpy as np
import matplotlib.pyplot as plt

from CartPole import CartPole
# from CartPole_GPS import CartPole_GPS

from ilqr.dynamics import constrain
from copy import deepcopy

from EstimateDynamics import local_estimate
from GMM import Estimated_Dynamics_Prior

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

from mujoco_py import load_model_from_path, MjSim, MjViewer
import mujoco_py

import time



### Formulate the iLQR problem

In [3]:
'''
1 - dt = time step
2 - N = Number of control points in the trajectory
3 - x0 = Initial state
4 - x_goal = Final state
5 - Q = State cost
6 - R = Control cost
7 - Q_terminal = Cost at the final step
8 - x_dynamics array stores the information regarding system. 
    x_dynamics[0] = m = mass of the pendulum bob 
    x_dynamics[1] = M = mass of the cart 
    x_dynamics[2] = L = length of the massles|s rod 
    x_dynamics[3] = g = gravity 
    x_dynamics[4] = d = damping in the system
'''
dt = 0.05
N = 600  # Number of time steps in trajectory.
x_dynamics = np.array([0.1, 1, 1, 9.80665, 0]) # m=1, M=5, L=2, g=9.80665, d=1
x0 = np.array([0.0, 0.0, 3.14, 0.0])  # Initial state
x_goal = np.array([0.0, 0.0, 0.0, 0.0])
# Instantenous state cost.
Q = np.eye(5)
Q[1,1] = 10
Q[2, 2] = 100
Q[3, 3] = 100
Q[4, 4] = 10
# Terminal state cost.
Q_terminal = np.eye(5) * 100
# Q_terminal[2, 2] = 100
# Q_terminal[3, 3] = 100
# Instantaneous control cost.
R = np.array([[1.0]])

### iLQR on Cart Pole

In [4]:
cartpole_prob = CartPole(dt, N, x_dynamics, x0, x_goal, Q, R, Q_terminal)
xs, us, K, k = cartpole_prob.run_IterLinQuadReg()

iteration 0 accepted 228324.5360899397 [ 0.0172082  -0.50365064 -1.78372388  4.1111202 ]
iteration 1 accepted 227441.8332296307 [ 0.17728896 -1.08941463 -1.90825797  3.58139625]
iteration 2 accepted 226678.2462862772 [ 0.18031956 -1.13749336 -2.29711152  3.94671564]
iteration 3 accepted 225775.55116085047 [ 0.19972014 -1.15387707 -2.6444573   4.1021442 ]
iteration 4 accepted 224533.93460132615 [ 0.18665524 -1.03538121  3.11943153  4.52595941]
iteration 5 accepted 222000.87206574492 [ 0.13531602 -0.65175171  2.25012601  4.78665395]
iteration 6 accepted 215737.5308995955 [0.1396074  0.51431978 0.32167239 1.17493167]
iteration 7 accepted 215031.5438136034 [ 0.01775409  0.17092985 -0.02643257  0.17111543]
iteration 8 accepted 214755.16764304662 [-0.03337405  0.11517411 -0.04120385  0.13292809]
iteration 9 accepted 214498.82682399222 [-0.02989681  0.11816241 -0.04365594  0.13885056]
iteration 10 accepted 214256.07827964504 [-0.02980065  0.11844378 -0.04721935  0.14118838]
iteration 11 accep

iteration 91 accepted 188494.04115440507 [-0.03830312  0.11797698 -0.34757043  0.42173883]
iteration 92 accepted 188354.5431353954 [-0.03823454  0.11719702 -0.3521075   0.42684042]
iteration 93 accepted 188195.59097341678 [-0.03816112  0.11639165 -0.35666842  0.43200384]
iteration 94 accepted 188016.08021629314 [-0.03808301  0.11556055 -0.3612532   0.43723003]
iteration 95 accepted 187817.1782934458 [-0.03800058  0.11470339 -0.36586186  0.44251994]
iteration 96 accepted 187603.5154229822 [-0.03791453  0.11381996 -0.3704944   0.44787451]
iteration 97 accepted 187382.77139233053 [-0.03782583  0.11291008 -0.37515084  0.45329473]
iteration 98 accepted 187162.29374126543 [-0.03773529  0.11197351 -0.37983116  0.45878152]
iteration 99 accepted 186945.32993447283 [-0.03764287  0.11100983 -0.38453534  0.46433579]
iteration 100 accepted 186731.2319778944 [-0.03754764  0.11001843 -0.38926333  0.46995834]
iteration 101 accepted 186517.94354803956 [-0.03744846  0.10899863 -0.39401505  0.47565   ]
i

iteration 181 accepted 177035.99388370715 [-0.00761845 -0.09501824 -0.82284788  1.16315123]
iteration 182 accepted 176938.63542434276 [-0.00694418 -0.09924098 -0.82847224  1.1739857 ]
iteration 183 accepted 176833.48498742934 [-0.00626123 -0.10350937 -0.83410881  1.18486616]
iteration 184 accepted 176719.51580112617 [-0.00556946 -0.10782429 -0.83975899  1.19579422]
iteration 185 accepted 176595.53510039474 [-0.00486872 -0.11218672 -0.8454243   1.20677171]
iteration 186 accepted 176460.15480401172 [-0.00415887 -0.11659776 -0.85110633  1.21780063]
iteration 187 accepted 176311.75662496343 [-0.00343981 -0.12105856 -0.85680683  1.22888322]
iteration 188 accepted 176148.45011727637 [-0.0027115  -0.12557041 -0.86252763  1.24002196]
iteration 189 accepted 175968.0213421417 [-0.00197397 -0.13013467 -0.8682707   1.25121954]
iteration 190 accepted 175767.86839909916 [-1.22738049e-03 -1.34752823e-01 -8.74038130e-01  1.26247891e+00]
iteration 191 accepted 175544.91757386664 [-4.72082585e-04 -1.394

iteration 270 accepted 150025.36296124177 [-0.00587515  0.01651974 -0.00282101  0.01429916]
iteration 271 accepted 150022.05051351376 [-0.00588668  0.01656512 -0.00283697  0.01434096]
iteration 272 accepted 150018.74236206294 [-0.00589809  0.0166103  -0.00285288  0.0143826 ]
iteration 273 accepted 150015.43887355147 [-0.00590936  0.01665527 -0.00286873  0.01442406]
iteration 274 accepted 150012.14043855565 [-0.00592051  0.01670005 -0.00288453  0.01446537]
iteration 275 accepted 150008.847467297 [-0.00593153  0.01674463 -0.00290027  0.01450651]
iteration 276 accepted 150005.56038542825 [-0.00594243  0.01678901 -0.00291596  0.0145475 ]
iteration 277 accepted 150002.2796298945 [-0.00595322  0.01683321 -0.00293161  0.01458832]
iteration 278 accepted 149999.00564490556 [-0.00596389  0.01687722 -0.0029472   0.014629  ]
iteration 279 accepted 149995.73887804046 [-0.00597446  0.01692104 -0.00296275  0.01466952]
iteration 280 accepted 149992.4797765141 [-0.00598491  0.01696468 -0.00297825  0.01

iteration 360 accepted 149769.8421998174 [-0.00664976  0.02003865 -0.00413075  0.01759025]
iteration 361 accepted 149767.51691730611 [-0.0066571   0.02007324 -0.00414449  0.01762299]
iteration 362 accepted 149765.20058208655 [-0.00666445  0.02010775 -0.00415823  0.01765567]
iteration 363 accepted 149762.8930112884 [-0.00667178  0.02014219 -0.00417195  0.0176883 ]
iteration 364 accepted 149760.59401430484 [-0.0066791   0.02017657 -0.00418566  0.01772086]
iteration 365 accepted 149758.30339269445 [-0.00668642  0.02021087 -0.00419936  0.01775336]
iteration 366 accepted 149756.0209400845 [-0.00669374  0.0202451  -0.00421305  0.0177858 ]
iteration 367 accepted 149753.74644207675 [-0.00670104  0.02027927 -0.00422674  0.01781818]
iteration 368 accepted 149751.47967614964 [-0.00670835  0.02031336 -0.00424041  0.0178505 ]
iteration 369 accepted 149749.22041156446 [-0.00671564  0.02034739 -0.00425407  0.01788276]
iteration 370 accepted 149746.9684092654 [-0.00672293  0.02038135 -0.00426772  0.01

iteration 450 accepted 149539.6922560692 [-0.00729771  0.0229134  -0.00533625  0.02033243]
iteration 451 accepted 149535.11901843082 [-0.00730468  0.02294305 -0.00534937  0.02036095]
iteration 452 accepted 149530.3805756284 [-0.00731164  0.02297265 -0.00536249  0.02038944]
iteration 453 accepted 149525.46423222337 [-0.00731858  0.0230022  -0.0053756   0.02041789]
iteration 454 accepted 149520.35606881898 [-0.0073255   0.02303171 -0.00538871  0.0204463 ]
iteration 455 accepted 149515.04079888517 [-0.0073324   0.02306118 -0.00540181  0.02047467]
iteration 456 accepted 149509.50160631773 [-0.00733928  0.0230906  -0.00541491  0.02050301]
iteration 457 accepted 149503.71996082438 [-0.00734613  0.02311997 -0.005428    0.02053132]
iteration 458 accepted 149497.67540774515 [-0.00735296  0.0231493  -0.00544109  0.02055958]
iteration 459 accepted 149491.3453283492 [-0.00735976  0.02317858 -0.00545417  0.02058781]
iteration 460 accepted 149484.70466596872 [-0.00736653  0.02320781 -0.00546725  0.0

iteration 540 accepted 129927.42291125294 [-0.00797868  0.025449   -0.00650393  0.02278015]
iteration 541 accepted 129927.27661967855 [-0.00798555  0.02547556 -0.00651676  0.02280608]
iteration 542 accepted 129927.13476234213 [-0.00799241  0.02550209 -0.00652959  0.02283198]
iteration 543 accepted 129926.99706564909 [-0.00799927  0.02552859 -0.00654242  0.02285786]
iteration 544 accepted 129926.86327602035 [-0.00800612  0.02555507 -0.00655525  0.02288372]
iteration 545 accepted 129926.73315821585 [-0.00801297  0.02558151 -0.00656807  0.02290955]
iteration 546 converged 129926.60649382493 [-0.00801982  0.02560793 -0.00658089  0.02293537]


In [4]:
# State matrix split into individual states. For plotting and analysing purposes.
t = np.arange(N + 1) * dt
x = xs[:, 0] # Position
x_dot = xs[:, 1] # Velocity
theta = np.unwrap(cartpole_prob.deaugment_state(xs)[:, 2])  # Theta, makes for smoother plots.
theta_dot = xs[:, 3] # Angular velocity
us_scaled = constrain(us, -1, 1)

### Simulate the real system and generate the data
Cost matrices, initial position and goal position will remain same as the above problem. As it indicates one policy. But still the initial positions and goal positions must be passed explicitly to the function. But you don't need to pass cost matrices (assume penalty on the system is same), this is just used to use to calculate the cost of the trajectory. Correct control action must be passed. Parameter gamma indicates how much of original data you want to keep

Variance of the Gaussian noise will be taken as input from a Unif(0, var_range) uniform distribution. Inputs: x_initial, x_goal, u, n_rollouts, pattern='Normal', pattern_rand=False, var_range=10, gamma=0.2, percent=20

Pattern controls how the control sequence will be modified after applying white Guassian noise (zero mean).
- Normal: based on the correction/mixing parameter gamma generate control (gamma controls how much noise we want).
- MissingValue: based on the given percentage, set those many values to zero (it is implicitly it uses "Normal" generated control is used). 
- Shuffle: shuffles the entire "Normal" generated control sequence.
- TimeDelay: takes the "Normal" generated control and shifts it by 1 index i.e. one unit time delay.
- Extreme: sets gamma as zeros and generates control based on only noise.

If 'pattern_rand' is 'True' then we don't need to send the explicitly, it will chose one randomly for every rollout (default is 'False'). If you want to chose specific pattern then send it explicitly. 

In [5]:
x_rollout, u_rollout, local_policy, cost = cartpole_prob.gen_rollouts(x0, x_goal, us, n_rollouts=10, pattern_rand=True, var_range=10, gamma=0.2, percent=20)

### Local system dynamics/model estimate
loca_estimate: function takes the states (arranged in a special format, [x(t), u(t), x(t+1)]), no. of gaussian mixtures and no.of states.

In [6]:
model = Estimated_Dynamics_Prior(init_sequential=False, eigreg=False, warmstart=True, 
                 min_samples_per_cluster=20, max_clusters=50, max_samples=20, strength=1.0)
model.update_prior(x_rollout, u_rollout)
A, B, C = model.fit(x_rollout, u_rollout)

In [None]:
print(A.shape)
print(B.shape)
print(C.shape)

In [5]:
Model = "mujoco/cartpole.xml"
model_loaded = load_model_from_path(Model)
sim = MjSim(model_loaded)

In [1]:
viewer = mujoco_py.MjViewer(sim)
t = 0
sim.data.qpos[0] = 0.0
sim.data.qpos[1] = 3.14
sim.data.qvel[0] = 0
sim.data.qvel[1] = 0
final = 0
for i in range(600):
    start_time = time.time()
    state = np.c_[sim.data.qpos[0],sim.data.qvel[0],np.sin(sim.data.qpos[1]),
                  np.cos(sim.data.qpos[1]),sim.data.qvel[1]].T
    control = np.dot(k[i,:],(xs[i].reshape(5,1) - state ))  + K[i].T + us[i]
    sim.data.ctrl[0] = us[i]
#     sim.data.ctrl[0] = control
    sim.step()
    viewer.render()
    if (sim.data.qpos[0] == 1.0 and sim.data.qpos[1] == 0):
        print('states reached')
        break
print(sim.get_state())

NameError: name 'mujoco_py' is not defined

In [None]:
import time
time.sleep(5)

In [None]:
from Simulator import Mujoco_sim
Model = "mujoco/cartpole.xml"
cart_pole_simulator = Mujoco_sim(Model,True)
cart_pole_simulator.load(xs,us,k,K,x0,initial=False)
cart_pole_simulator.runSimulation()

In [None]:
cart_pole_simulator.runSimulation()

In [None]:
np.max(xs)

In [None]:
np.max(us)

In [None]:
import matplotlib.pyplot as plt
# plt.plot(xs[0,:])
plt.plot(us)

In [None]:
import sys 
np.set_printoptions(threshold=sys.maxsize)

In [14]:
target_jacp = np.zeros(3 * sim.model.nv)

In [16]:
sim.data.get_site_jacp('tip', jacp=target_jacp) 
#This is aparently position jacobian there is something called rotational jacobian get_site_jacr

array([0., 0., 0., 0., 0., 0.])

In [None]:
a = sim.render(width=200, height=200, camera_name='fixed', depth=True)
 ## a is a tuple if depth is True and a numpy array if depth is False ##


In [6]:
sim.data.get_site_jacp('body', jacp=target_jacp) 

NameError: name 'target_jacp' is not defined