In [12]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Solve a single mto once initiallly

In [18]:
from __future__ import print_function, absolute_import
import math
import matplotlib.pyplot as plt
import numpy as np

from multiple_traj_opt import (
    make_mto,
    MultipleTrajOpt,
    initial_conditions_Russ,
    initial_conditions_grid,
    initial_conditions_random,
)
from nn_system.networks import *

In [None]:
new_mto = make_mto()
new_mto.Solve()

## Repeatedly run any of the cells below to do an iteration of each method!
Will switch out the values of old_mto and mto

In [16]:
# 0) RESTART WITH A DIFFERENT MINIBATCH OF INITIAL CONDITIONS
# Simplest possible version, (using the EXACT previous answer as a warm start)
def method0(mto, ic_list, **kwargs):
    old_mto = mto
    mto = make_mto(ic_list=ic_list, **kwargs) # Give the new ics here.

    # Warm start...
    if old_mto is not None:
        old_mto_dec_vals = old_mto.prog.GetSolution(old_mto.prog.decision_variables())
        mto.prog.SetInitialGuessForAllVariables(old_mto_dec_vals)

    mto.Solve()
    return old_mto, mto
# old_mto, new_mto = method1(new_mto)

In [None]:
# 1) SIMPLE RESTART WITH POTENTIALLY DIFFERENT SETTINGS, full huxT
# Then resolve again with (potentially different settings, but using the previous answer as a warm start)
def method1(mto):
    old_mto = mto
    mto = make_mto()

    # Warm start...
    old_mto_dec_vals = old_mto.prog.GetSolution(old_mto.prog.decision_variables())
    mto.prog.SetInitialGuessForAllVariables(old_mto_dec_vals)

    mto.Solve()
    return old_mto, mto
old_mto, new_mto = method1(new_mto)

In [None]:
# 2) RESTART WITH A DIFFERENT MINIBATCH OF INITIAL CONDITIONS, use limited wallclock time policy rollouts?
# Then resolve again with (potentially different settings, but using the previous answer as a warm start)
def method2(mto):
    old_mto = mto
    old_mto_T_vals = old_mto.prog.GetSolution(old_mto.T)
    mto = make_mto()

    ic_list = #TODO
    assert len(ic_list) == num_trajectories
    for ic in ic_list:
        t_samples, x_samples, u_samples, logger = old_mto.__rollout_policy_at_solution(ti_or_ic=ic) # Be careful about this taking forever!!!!
        # Add a return for u_samples!!
        warm_start = #TODO: assemble a new batch of h, u, x?
        mto.prog.SetInitialGuess(mto.h[ti], ) #TODO
        mto.prog.SetInitialGuess(mto.u[ti], ) #TODO
        mto.prog.SetInitialGuess(mto.x[ti], ) #TODO
    mto.prog.SetInitialGuess(mto.T, old_mto_T_vals)
    mto.Solve()
    return old_mto, mto
old_mto, new_mto = method2(new_mto)

In [None]:
# 3) RESTART WITH A DIFFERENT MINIBATCH OF INITIAL CONDITIONS, nearby traj. interpolations?
# Begs the question, will I want a history of trajectories??
# Then resolve again with (potentially different settings, but using the previous answer as a warm start)
# Keep a "BANK" of trajectories, that we can optionally use for warm starting?
# No consistency here!
if not trajectories:
    trajectories = []
def method3(mto, ic_list):
    global trajectories
    old_mto = mto

    for ti in range(num_trajectories):
        trajectory = np.hstack([old_mto.GetSolution(var) for var in (old_mto.h[ti], old_mto.u[ti], old_mto.x[ti])]) # (h, u_t's, x_t's)
        trajectories.append(trajectory)
    if len(trajectories) >mto = mto
    old_mto_T_vals = old_mto.prog.GetSolution(old_mto.T)
    mto = make_mto()

    warm_mto = MultipleTrajOpt("pendulum", 16, 16, 0.2, 0.5, ic_list=ic_list, warm_start=True, seed=old_mto.seed)
    warm_mto.add_nn_params(old_mto.kNetConstructor,
                      use_constraint    = False,
                      cost_factor       = 1.0,
                      initialize_params = True, 
                      reg_type          = old_mto.reg_type)
    warm_mto.add_cost_and_constraint_printing_callback(1)
    warm_mto.prog.SetInitialGuess(warm_mto.T, old_mto_T_vals) # "Warm start the warm start with the old network?"
    warm_mto.Solve()

    # Warm start paths with fresh solves, but carry over NN
    warm_mto_hux_vals = np.hstack([warm_mto.prog.GetSolution(var) for var in (warm_mto.h, warm_mto.u, warm_mto.x)])
    mto.prog.SetInitialGuessForAllVariables(np.hstack([warm_mto_hux_vals, old_mto_T_vals]))

    mto.Solve() CUTOFF:
        trajectories = trajectories[-CUTOFF:] # Keep bank trim and hopefully filter out the old crappy trajectories...
    mto = make_mto()

    # Warm start... but use a different warm start scheme
    ic_list = #TODO
    interpolants = []
    for ic in ic_list:
        # TODO, should have format of (h, u_t's, x_t's). Should be graded by ic and fc (and path?) proximity??
        # Can come from the previous solution, or the bank
        nearest = #TODO
        interpolants.append()
    assert len(interpolants) == len(ic_list)
    old_mto_dec_vals = old_mto.prog.GetSolution(old_mto.prog.decision_variables())
    mto.prog.SetInitialGuessForAllVariables(old_mto_dec_vals)

    mto.Solve()
    return old_mto, mto
old_mto, new_mto = method3(new_mto)

In [None]:
# 4) RESTART WITH A DIFFERENT MINIBATCH OF INITIAL CONDITIONS, fresh traj. solves? - should i split the traj solves?
# Then resolve again with (potentially different settings, but using the previous answer as a warm start)
def method4(mto, ic_list):
    old_mto = mto
    mto = make_mto()

    warm_mto = MultipleTrajOpt("pendulum", 16, 16, 0.2, 0.5, ic_list=ic_list, warm_start=True, seed=old_mto.seed)
    warm_mto.add_cost_and_constraint_printing_callback(1)
    warm_mto.Solve()

    # Warm start paths with fresh solves, but carry over NN
    warm_mto_hux_vals = np.hstack([warm_mto.prog.GetSolution(var) for var in (warm_mto.h, warm_mto.u, warm_mto.x)])
    old_mto_T_vals = old_mto.prog.GetSolution(old_mto.prog.decision_variables())
    mto.prog.SetInitialGuessForAllVariables(mto.prog.decision_variables(), np.hstack([warm_mto_hux_vals, old_mto_T_vals]))

    mto.Solve()
    return old_mto, mto
old_mto, new_mto = method4(new_mto)

In [None]:
# 5) RESTART WITH A DIFFERENT MINIBATCH OF INITIAL CONDITIONS, fresh traj. solves with policy violation cost?
# Is this even cheaper?
# Then resolve again with (potentially different settings, but using the previous answer as a warm start)
def method5(mto, ic_list):
    old_mto = mto
    old_mto_T_vals = old_mto.prog.GetSolution(old_mto.T)
    mto = make_mto()

    warm_mto = MultipleTrajOpt("pendulum", 16, 16, 0.2, 0.5, ic_list=ic_list, warm_start=True, seed=old_mto.seed)
    warm_mto.add_nn_params(old_mto.kNetConstructor,
                      use_constraint    = False,
                      cost_factor       = 1.0,
                      initialize_params = True, 
                      reg_type          = old_mto.reg_type)
    warm_mto.add_cost_and_constraint_printing_callback(1)
    warm_mto.prog.SetInitialGuess(warm_mto.T, old_mto_T_vals) # "Warm start the warm start with the old network?"
    warm_mto.Solve()

    # Warm start paths with fresh solves, but carry over NN
    warm_mto_hux_vals = np.hstack([warm_mto.prog.GetSolution(var) for var in (warm_mto.h, warm_mto.u, warm_mto.x)])
    mto.prog.SetInitialGuessForAllVariables(np.hstack([warm_mto_hux_vals, old_mto_T_vals]))

    mto.Solve()
    return old_mto, mto
old_mto, new_mto = method5(new_mto)

In [None]:
# 6) EXPERIMENT WITH CARRYING OVER MORE THAN DECISION VARIABLE INITIAL GUESSES!!!!
# THIS WILL BE THE HARDEST EXPERIMENT... IT WILL REQUIRE RECOMPILING A NEW VERSION OF DRAKE????
# <TODO>
# https://github.com/RobotLocomotion/drake/blob/7c513516620b1e6001fd487e076c39f716027a79/solvers/snopt_solver.cc#L682

# Then resolve again with (potentially different settings, but using the previous answer as a warm start)
def method6(mto, ic_list):
    old_mto = mto
    mto = make_mto()

    # Warm start... but use a different warm start scheme
    old_mto_dec_vals = old_mto.prog.GetSolution(old_mto.prog.decision_variables())
    mto.prog.SetInitialGuessForAllVariables(old_mto_dec_vals)

    mto.Solve()
    return old_mto, mto
old_mto, new_mto = method6(new_mto)

## Applying the warm starting methods to a full iterative optimization process below

In [10]:
def test_inner(string, secret=None):
    if secret is not None:
        print("got the secret")
    else:
        print(string)
def test_outer(string, **kwargs):
    test_inner(string, **kwargs)
test_outer("nope", secret="open sesame")

got the secret


In [None]:
##### RUSS'S MINIBATCH METHOD
# Initial solve here...
old_mto = None

# TODO: Expose switches for:
# - minibatching    (here in the outer loop)
# - dropouts        (NN-specific)
# - NN init?        (NN-specific)
# - NN noise adding (NN-specific)

# Minibatch optimization...
total_iterations = 3
while total_iterations > 0:
    total_iterations -= 1
    ic_list = initial_conditions_random(16, (-math.pi, math.pi), (-5, 5))
    old_mto, new_mto = method0(new_mto, 
                               ic_list, 
                               vis_cb_every_nth=10, 
                               cost_cb_every_nth=10, 
                               snopt_overrides=[('Time limit (secs)',  10.0)])

Overrode Time limit (secs) = 10.0
 0

<IPython.core.display.Javascript object>

total cost:  322.71 | 	constraint  1228.80 	bad 198,  969.29
 1 2 3 4 5 6 7 8 9 10total cost:  603.48 | 	constraint  1226.54 	bad 192,  918.40
 11 12 13 14 15 16 17 18 19 20total cost:  337.69 | 	constraint  1228.57 	bad 198,  966.07
 21 22TOTAL ELAPSED TIME: 11.5217449665
Overrode Time limit (secs) = 10.0
 0total cost:  337.69 | 	constraint  1228.57 	bad 198,  966.07
 1 2 3 4 5 6 7 8 9 10total cost:  355.89 | 	constraint  1228.36 	bad 198,  962.32
 11 12 13 14 15 16 17 18 19 20total cost:  1718.04 | 	constraint  1248.25 	bad 177,  816.39
 21TOTAL ELAPSED TIME: 11.0675330162
Overrode Time limit (secs) = 10.0
 0total cost:  1718.04 | 	constraint  1248.25 	bad 177,  816.39
 1 2 3 4 5 6 7 8 9 10total cost:  5634.46 | 	constraint  1381.12 	bad 239,  714.35
 11 12 13 14 15 16 17 18 19 20

In [None]:
##### IGOR'S BLOCK-ALTERNATING METHOD - THIS IS BEING WORKED ON IN ANOTHER NOTEBOOK!!!!!
# Init work here...
# <TODO>

# Minibatch optimization... Let's not do any warm starting for Igor's...
total_iterations = 10
while total_iterations >= 0:
    total_iterations -= 1
    ic_list = #TODO
    igor_traj_opt()
    igor_supervised_learning()
    visualize_intermediate_results()

## ----------------------------------------- Visualizations below this line --------------------------------------

In [None]:
#### Get a sense of the consistency of x and u!
# Let's plot all the u's on the z axis and x (theta, theta_dot) on the y and x axes...
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

xs, ys, zs = [], [], []
for ti in range(new_mto.num_trajectories):
    h_sol  = new_mto.prog.GetSolution(new_mto.h[ti])[0]
    print(h_sol)

    t_s = [h_sol*i for i in range(new_mto.num_samples)]
    x_s  = new_mto.prog.GetSolution(new_mto.x[ti])
    u_s    = new_mto.prog.GetSolution(new_mto.u[ti])
    x_trajectory = PiecewisePolynomial.Cubic(t_s, x_s, False)
#     t_samples = np.linspace(breaks[0], breaks[-1], 100) 
#     x_samples = np.hstack([x_trajectory.value(t) for t in t_samples])
    thetas, theta_dots = zip(*x_s)
    print(thetas, theta_dots, u_s)
    xs += thetas
    ys += theta_dots
    zs += u_s
print(list((len(thing) for thing in (xs, ys, zs))))
ax.scatter(xs, ys, zs, c='b', marker='o')

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

plt.show()

In [None]:
import math
ic_list = initial_conditions_grid(36, (0, math.pi), (-5, 5))
new_mto.plot_all_policies("state_quiver", ic_list)

In [None]:
from nn_system.NNSystemHelper import create_nn
kNetConstructor = lambda: FCBIG(2)
nn = create_nn(kNetConstructor, list(new_mto.prog.GetSolution(new_mto.T)))
list(nn.parameters())

In [None]:
ti = 0
mto.render_policy(ti)

In [None]:
mto.plot_all_trajectories("state_scatter")
# mto.plot_all_trajectories("state_quiver")
# mto.plot_all_trajectories("tip_scatter")
# mto.plot_all_trajectories("tip_quiver")

### Visualize the found trajectories

In [None]:
ti = 0
mto.plot_single_trajectory(ti, "state_scatter")
mto.plot_single_trajectory(ti, "state_quiver")
mto.plot_single_trajectory(ti, "tip_scatter")
mto.plot_single_trajectory(ti, "tip_quiver")

mto.render_single_trajectory(ti)