### Import required libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt

from CartPole import CartPole
from CartPole_GPS import CartPole_GPS

from ilqr.dynamics import constrain
from copy import deepcopy

from EstimateDynamics import local_estimate
from GMM import Estimated_Dynamics_Prior

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel


### Formulate the iLQR problem

In [3]:
'''
1 - dt = time step
2 - N = Number of control points in the trajectory
3 - x0 = Initial state
4 - x_goal = Final state
5 - Q = State cost
6 - R = Control cost
7 - Q_terminal = Cost at the final step
8 - x_dynamics array stores the information regarding system. 
    x_dynamics[0] = m = mass of the pendulum bob 
    x_dynamics[1] = M = mass of the cart 
    x_dynamics[2] = L = length of the massless rod 
    x_dynamics[3] = g = gravity 
    x_dynamics[4] = d = damping in the system
'''
dt = 0.005
N = 500  # Number of time steps in trajectory.
x_dynamics = np.array([1, 5, 2, 9.80665, 1]) # m=1, M=5, L=2, g=9.80665, d=1
x0 = np.array([-3.0, 0.0, 0.1, 0.0])  # Initial state
x_goal = np.array([2.0, 0.0, 0.0, 0.0])
# Instantenous state cost.
Q = np.eye(5)
Q[2, 2] = 10
Q[3, 3] = 10
# Q[4, 4] = 100
# Terminal state cost.
Q_terminal = 100 * np.eye(5)
# Instantaneous control cost.
R = np.array([[1.0]])

### iLQR on Cart Pole

In [4]:
cartpole_prob = CartPole(dt, N, x_dynamics, x0, x_goal, Q, R, Q_terminal)
xs, us = cartpole_prob.run_IterLinQuadReg()

iteration 0 accepted 21431.404163502222 [-1.95030539  1.77219512  2.72617026  3.95998076]
iteration 1 failed 21431.404163502222 [-1.95030539  1.77219512  2.72617026  3.95998076]
iteration 2 failed 21431.404163502222 [-1.95030539  1.77219512  2.72617026  3.95998076]
iteration 3 failed 21431.404163502222 [-1.95030539  1.77219512  2.72617026  3.95998076]
iteration 4 accepted 21410.85889509205 [-1.94480191  1.75982922  2.70133696  3.9439228 ]
iteration 5 failed 21410.858895092053 [-1.94480191  1.75982922  2.70133696  3.9439228 ]
iteration 6 failed 21410.858895092053 [-1.94480191  1.75982922  2.70133696  3.9439228 ]
iteration 7 failed 21410.858895092053 [-1.94480191  1.75982922  2.70133696  3.9439228 ]
iteration 8 failed 21410.858895092053 [-1.94480191  1.75982922  2.70133696  3.9439228 ]
iteration 9 failed 21410.858895092053 [-1.94480191  1.75982922  2.70133696  3.9439228 ]
iteration 10 accepted 21357.33027328689 [-1.9436296   1.75975628  2.69987567  3.94302394]
iteration 11 accepted 21355

In [5]:
# State matrix split into individual states. For plotting and analysing purposes.
t = np.arange(N + 1) * dt
x = xs[:, 0] # Position
x_dot = xs[:, 1] # Velocity
theta = np.unwrap(cartpole_prob.deaugment_state(xs)[:, 2])  # Theta, makes for smoother plots.
theta_dot = xs[:, 3] # Angular velocity

### Simulate the real system and generate the data
Cost matrices, initial position and goal position will remain same as the above problem. As it indicates one policy. But still the initial positions and goal positions must be passed explicitly to the function. But you don't need to pass cost matrices (assume penalty on the system is same), this is just used to use to calculate the cost of the trajectory. Correct control action must be passed. Parameter gamma indicates how much of original data you want to keep

Variance of the Gaussian noise will be taken as input from a Unif(0, var_range) uniform distribution. Inputs: x_initial, x_goal, u, n_rollouts, pattern='Normal', pattern_rand=False, var_range=10, gamma=0.2, percent=20

Pattern controls how the control sequence will be modified after applying white Guassian noise (zero mean).
- Normal: based on the correction/mixing parameter gamma generate control (gamma controls how much noise we want).
- MissingValue: based on the given percentage, set those many values to zero (it is implicitly it uses "Normal" generated control is used). 
- Shuffle: shuffles the entire "Normal" generated control sequence.
- TimeDelay: takes the "Normal" generated control and shifts it by 1 index i.e. one unit time delay.
- Extreme: sets gamma as zeros and generates control based on only noise.

If 'pattern_rand' is 'True' then we don't need to send the explicitly, it will chose one randomly for every rollout (default is 'False'). If you want to chose specific pattern then send it explicitly. 

In [6]:
x_rollout, u_rollout, local_policy, cost = cartpole_prob.gen_rollouts(x0, x_goal, us, n_rollouts=10, pattern_rand=True, var_range=10, gamma=0.2, percent=20)

### Local system dynamics/model estimate
loca_estimate: function takes the states (arranged in a special format, [x(t), u(t), x(t+1)]), no. of gaussian mixtures and no.of states.

In [7]:
model = Estimated_Dynamics_Prior(init_sequential=False, eigreg=False, warmstart=True, 
                 min_samples_per_cluster=20, max_clusters=50, max_samples=20, strength=1.0)
model.update_prior(x_rollout, u_rollout)
A, B, C = model.fit(x_rollout, u_rollout)

50 number of cluster in the mixture
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00209603 -0.01834321  0.10971131  0.99391764  0.0804852   0.11520358
 -3.00218774 -0.01899396  0.110111    0.993871    0.08274495] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00209603 -0.01834321  0.10971131  0.99391764  0.0804852   0.11520358
 -3.00218774 -0.01899396  0.110111    0.993871    0.08274495] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00209603 -0.01834321  0.10971131  0.99391764  0.0804852   0.11520358
 -3.00218774 -0.01899396  0.110111    0.993871    0.08274495] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00209603 -0.01834321  0.10971131  0.99391764  0.0804852   0.11520358
 -3.00218774 -0.01899396  0.110111    0.993871    0.08274495] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00209603 -0.01834321  0.10971131  0.99391764  0.0804852   0.11520358
 -3.00218774 -0.01899396  0.110111    0.993871    0.08274495] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00209603 -0.01834322  0.10971131  0.99391763  0.08048523  0.11520358
 -3.00218775 -0.01899397  0.11011101  0.993871    0.08274498] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00440932 -0.02755959  0.11947213  0.99238483  0.11137915  0.11281089
 -3.00454712 -0.02827257  0.12002253  0.99230398  0.11384089] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00500365 -0.02992745  0.12197987  0.99199103  0.11931638  0.11219617
 -3.00515329 -0.03065642  0.12256899  0.99190138  0.12183002] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

 -3.01942357 -0.08677652  0.18251719  0.98242349  0.30990844] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.019098   -0.08608067  0.18145021  0.98265203  0.30754576  0.0976181
 -3.01952841 -0.08718879  0.18295758  0.98235387  0.3112901 ] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.01915077 -0.08629089  0.18167285  0.98261707  0.30825042  0.09756352
 -3.01958222 -0.08740043  0.18318365  0.98231812  0.3

(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.01918435 -0.08642467  0.18181454  0.98259482  0.30869888  0.09752879
 -3.01961647 -0.08753512  0.18332753  0.98229538  0.31245077] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.01918435 -0.08642467  0.18181454  0.98259482  0.30869888  0.09752879
 -3.01961647 -0.08753512  0.18332753  0.98229538  0.31245077] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) t

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.02958798 -0.10255856  0.20056515  0.97675423  0.35090226  0.13837929
 -3.03010077 -0.10369827  0.20226069  0.97634003  0.35500644] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.02979565 -0.1028784   0.2009333   0.97663905  0.35173043  0.13920353
 -3.03031004 -0.10401866  0.20263241  0.97622259  0.35584152] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.06456303 -0.1797177   0.32307893  0.94285812  0.63116914  0.09488904
 -3.06546162 -0.18142181  0.32602955  0.94176713  0.63770844] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.06556522 -0.18302917  0.32631656  0.9414592   0.63878841  0.01773748
 -3.06648037 -0.18480728  0.32929617  0.94034024  0.64541111] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.07646282 -0.19995316  0.34598122  0.93532949  0.68305365  0.0595058
 -3.07746258 -0.20176286  0.34915225  0.93409009  0.69004612] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.07498283 -0.19724446  0.34374997  0.93629364  0.67795698  0.08938542
 -3.07596905 -0.19902448  0.34690168  0.93507321  0.68489916] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) thi

(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.08429298 -0.21210808  0.37400528  0.92051157  0.74533302  0.05579543
 -3.08535352 -0.21388375  0.37737576  0.91899768  0.752822  ] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.09284426 -0.22532341  0.40166589  0.90577783  0.80701696  0.04606599
 -3.09397088 -0.22706801  0.40523264  0.90399094  0.81499695] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.14060501 -0.28560728  0.56595609  0.81250042  1.17418779  0.06045393
 -3.14203305 -0.28696617  0.57060944  0.80901778  1.18503412] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.14001056 -0.28453226  0.56622484  0.81235529  1.17463628  0.06604253
 -3.14143322 -0.28588598  0.5708794   0.80887049  1.18548593] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.15110529 -0.29959989  0.59944816  0.78708624  1.25394772  0.02786923
 -3.15260329 -0.30073224  0.60424496  0.78316366  1.2653276 ] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.15134753 -0.29996112  0.59959398  0.78698278  1.25432355  0.026376
 -3.15284734 -0.30109391  0.60439167  0.78305828  1.26570625] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.15110194 -0.2995949   0.59944624  0.78708757  1.25394276  0.02788864
 -3.15259991 -0.30072725  0.60424302  0.78316502  1.26532261] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.15110194 -0.29959491  0.59944624  0.78708757  1.25394276  0.02788862
 -3.15259991 -0.30072725  0.60424302  0.78316502  1.26532261] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

 -3.20846653 -0.29461633  0.81646411  0.55262641  1.81485897] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.2071326  -0.2967481   0.81162961  0.56007961  1.80023345 -0.00985237
 -3.20861634 -0.29478848  0.81645162  0.55262119  1.81486722] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.20710555 -0.29671694  0.81163214  0.56008022  1.8002328  -0.00953912
 -3.20858914 -0.29475709  0.8164542   0.55262179  1.

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.21693614 -0.27843976  0.84133386  0.50932812  1.89679575 -0.15955131
 -3.21832834 -0.27575384  0.84589977  0.50120289  1.91191699] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.21418334 -0.27521521  0.8416813   0.50926145  1.89698511 -0.12619215
 -3.21555941 -0.27250234  0.84625101  0.50113385  1.91210197] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.25893537 -0.21370498  0.90151164  0.34855088  2.16860816  0.18088504
 -3.26000389 -0.20786238  0.90476594  0.33864346  2.1848173 ] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.25893817 -0.21363361  0.90155145  0.34843463  2.16879438  0.18075342
 -3.26000634 -0.20778899  0.90480475  0.33852593  2.18500433] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.26787185 -0.02855578  0.9824193   0.06744254  2.60780188  0.86482764
 -3.26801463 -0.01696386  0.98300791  0.05462689  2.62581755] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.26584594 -0.01475081  0.97517945  0.06181955  2.61354175 -0.03297198
 -3.26591969 -0.00390055  0.97560262  0.04909257  2.63148471] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.2633444   0.07449375  0.96414549 -0.03418825  2.74562919  0.26846587
 -3.26297193  0.08718729  0.96313472 -0.04735759  2.76388838] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.25285785  0.21867714  0.94955381 -0.16729811  2.92624972 -0.25622567
 -3.25176446  0.23313595  0.94655611 -0.18108159  2.94497096] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.2400549   0.384146    0.91676023 -0.31771027  3.1273602  -0.1289707
 -3.23813417  0.4009944   0.91131869 -0.33190726  3.14647768] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.24005576  0.38414571  0.91676009 -0.31771085  3.12736101 -0.12898197
 -3.23813503  0.4009941   0.91131855 -0.33190783  3.14647849] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) thi

(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.20884092  0.68474196  0.79569191 -0.56479814  3.46379311 -0.36263843
 -3.20541721  0.70364132  0.78554609 -0.57835383  3.48254088] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.20542596  0.70359429  0.78359218 -0.57629197  3.48052613 -0.30834324
 -3.20190799  0.72243261  0.77317736 -0.5896788   3.49911571] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00733436  1.25705903  0.22780233 -0.92881505  3.99015699 -0.24746431
 -3.00104907  1.26445124  0.20918363 -0.93304447  3.99673509] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-3.00712643  1.25715059  0.22724354 -0.92885701  3.99023002 -0.24720371
 -3.00084068  1.26452377  0.20862377 -0.93307546  3.9967915 ] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-2.92039838  1.32480277 -0.0414634  -0.96248447  4.04771431 -0.19741123
 -2.91377437  1.32294586 -0.06096251 -0.96142007  4.04625345] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-2.91035165  1.31062711 -0.07102413 -0.95450917  4.03571386 -0.1800279
 -2.90379851  1.30779418 -0.09030314 -0.95287238  4.03339633] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) thi

(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-2.67464653  0.83340791 -0.73377273 -0.66108866  3.61183738  0.33037834
 -2.67047949  0.81392003 -0.74567646 -0.6478114   3.59353646] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) this is the shape of the logwts before the logsum
(1, 50) this is the shape of the logwts after the logsum
(50, 11) shape of the mu in the function moments
(11,) mu after sum
[-2.67662027  0.82103955 -0.7401389  -0.65504091  3.60342678  0.32756903
 -2.67251507  0.80148509 -0.75190259 -0.64167632  3.58503155] (11,) mu0 in the inference
(11,) this is the shape of the mu0 in fit
(11, 11) this is the shape of the mu0 in fit
(10, 50) this is the shape of the logobs
(10, 50) th

In [8]:
print(A.shape)
print(B.shape)
print(C.shape)

(501, 5, 6)
(501, 5)
(501, 5, 5)


### iLQR on estimated model
Here system dynamics is specified in a special way. We give the A, B, C matrices as input. These matrices comes from GMM and GPS theory. They are the mean/expected trajectory followed by the states which is represented by the mean & covariance (A, B, C) matrices of a Gaussian. Remaining all properties of the iLQR problem remains the same (cost, initial & goal state, time steps). 

In [None]:
x_traj,u_traj = cartpole_prob.run_IterLinQuadReg_matrix(A, B, C)

### Plot

In [None]:
# Control sequence
plt.plot(np.arange(us.shape[0]), us, 'r.', label='Original')
plt.plot(np.arange(us.shape[0]), u_rollout[0:N], 'b.', label='Corrupted')
plt.plot(np.arange(us.shape[0]), u_traj, 'g.', label='Estimated')
plt.xlabel('Time steps')
plt.ylabel('U')
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(xs.shape[0]), xs[:, 2], 'r.', label='Original')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_rollout)[0:N+1, 2], 'b.', label='Corrupted')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_traj)[:, 2], 'g.', label='Estimated')
plt.xlabel('Time steps')
plt.ylabel('Theta')
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(xs.shape[0]), xs[:, 0], 'r.', label='Original')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_rollout)[0:N+1, 0], 'b.', label='Corrupted')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_traj)[:, 0], 'g.', label='Estimated')
plt.xlabel('Time steps')
plt.ylabel('Pos')
plt.legend()
plt.show()

### GPS 

In [None]:
dt = 0.005
N = 500  # Number of time steps in trajectory.
x_dynamics = np.array([1, 5, 2, 9.80665, 1]) # m=1, M=5, L=2, g=9.80665, d=1
# Instantenous state cost.
Q = np.eye(5)
Q[1,1] = 10
Q[2, 2] = 1
Q[3, 3] = 10
Q[4, 4] = 1
# Terminal state cost.
Q_terminal = 100 * np.eye(5)
# Instantaneous control cost.
R = np.array([[1.0]])

In [None]:
x_train = []
u_train = []
for i in range(10):
    print('iteration is ',i)
    x0 = np.array([2, 0, 0.001*i , 0])  # Initial state
    x_goal = np.array([2, 0.0, 0.0, 0.0])
    cartpole_prob = CartPole(dt, N, x_dynamics, x0, x_goal, Q, R, Q_terminal)
    xs, us = cartpole_prob.run_IterLinQuadReg()
    t = np.arange(N + 1) * dt
    x = xs[:, 0] # Position
    x_dot = xs[:, 1] # Velocity
    theta = np.unwrap(cartpole_prob.deaugment_state(xs)[:, 2])  # Theta, makes for smoother plots.
    theta_dot = xs[:, 3] # Angular velocity
    x_rollout, u_rollout, local_policy, x_gmm, cost = cartpole_prob.gen_rollouts(x0, x_goal, us, 
                                     n_rollouts=20, pattern_rand=False, var_range=10, gamma=0.8, percent=20)
    model = local_estimate(x_gmm, components=5, NoOfstates=5)
    A, B, C = model.estimate(N=N)
    x_traj,u_traj = cartpole_prob.run_IterLinQuadReg_matrix(A, B, C)
    x_train.append(x_traj)
    u_train.append(u_traj)

In [None]:
x_train1 = x_train[0][:-1]
u_train1 = u_train[0]
for i in range(1,9):
    x_train1 = np.vstack((x_train1,x_train[i][:-1]))
    u_train1 = np.vstack((u_train1,u_train[i]))

In [None]:
u_gr = constrain(u_train1,-0.9,0.9)

In [None]:
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
        random_state=0).fit(x_train1, u_gr)

In [None]:
gpr.score(x_train1,u_gr)

In [None]:
u_pre = gpr.predict(xs)

In [None]:
plt.plot(np.arange(us.shape[0]), constrain(us, -0.9, 0.9), 'r.', label='Original')
plt.plot(np.arange(us.shape[0]), constrain(u_pre[0:N],-0.9,0.9), 'b.', label='Global')
plt.plot(np.arange(us.shape[0]), constrain(u_traj, -0.9, 0.9), 'g.', label='Local')
plt.xlabel('Time steps')
plt.ylabel('U')
plt.legend()
plt.savefig('control.pdf')
plt.show()

In [None]:
x_rollout00, u_rollout00, local_policy00, x_gmm00, cost00 = cartpole_prob.gen_rollouts(x0, x_goal, u_pre[:-1], n_rollouts=10, var_range=0, gamma=1, percent=0)

In [None]:
plt.plot(np.arange(xs.shape[0]), xs[:, 0], 'r.', label='Original')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_rollout00)[0:N+1, 0], 'b.', label='Global')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_traj)[:, 0], 'g.', label='Local')
plt.xlabel('Time steps')
plt.ylabel('Pos')
plt.legend()
plt.savefig('position.pdf')
plt.show()

In [None]:
plt.plot(np.arange(xs.shape[0]), xs[:, 2], 'r.', label='Original')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_rollout00)[0:N+1, 2], 'b.', label='Global')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_traj)[:, 2], 'g.', label='Local')
plt.xlabel('Time steps')
plt.ylabel('Theta')
plt.legend()
plt.savefig('theta.pdf')
plt.show()

In [None]:
# plt.subplot(3,1,1)
# plt.plot(np.arange(us.shape[0]), constrain(us, -0.9, 0.9), 'r.', label='Original')
# plt.plot(np.arange(us.shape[0]), constrain(u_pre[0:N],-0.9,0.9), 'b.', label='GPS')
# plt.plot(np.arange(us.shape[0]), constrain(u_traj, -0.9, 0.9), 'g.', label='Estimated')
# plt.xlabel('Time steps')
# plt.ylabel('U')
# plt.legend()
# plt.title('Control action vs time')


plt.subplot(2,1,1)
plt.plot(np.arange(xs.shape[0]), xs[:, 0], 'r.', label='Original')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_rollout00)[0:N+1, 0], 'b.', label='Corrupted')
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_traj)[:, 0], 'g.', label='Estimated')
plt.xlabel('Time steps')
plt.title('position vs time')
plt.ylabel('Pos')



plt.subplot(2,1,2)
plt.plot(np.arange(xs.shape[0]), xs[:, 2], 'r.', label='Original' , lw=2)
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_rollout00)[0:N+1, 2], 'b.', label='Corrupted' , lw=2)
plt.plot(np.arange(xs.shape[0]), cartpole_prob.deaugment_state(x_traj)[:, 2], 'g.', label='Estimated', lw=2)
plt.xlabel('Time steps')
plt.title('theta vs time')
plt.ylabel('Theta')

plt.subplots_adjust(hspace=1.5)
plt.savefig('total.pdf')
plt.show()