In [43]:
%matplotlib inline
from __future__ import print_function, absolute_import

import math
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

from pydrake.all import (
    DiagramBuilder,
    FloatingBaseType,
    RigidBodyTree,
    RigidBodyPlant,
    SignalLogger, 
    Simulator, 
    VectorSystem
)
# from pydrake.examples.pendulum import PendulumPlant
from pydrake.systems.controllers import (
    DynamicProgrammingOptions, FittedValueIteration, PeriodicBoundaryCondition)
# from traj.visualizer import PendulumVisualizer
# from pydrake.examples.acrobot import AcrobotPlant
from underactuated import (
    PlanarRigidBodyVisualizer
)

In [53]:
from pydrake.all import (BarycentricMesh, BarycentricMeshSystem)
def save_policy(name): # binds to policy, state_grid, and cost_to_go
    output_values = policy.get_output_values()
    np.save('numpy_saves/pi_b_mesh_init__cartpole_'+name, state_grid)
    np.save('numpy_saves/pi_output_values__cartpole_'+name, output_values)
    np.save('numpy_saves/ctg__cartpole_'+name, cost_to_go)
def load_policy(name):
    b_mesh_init = np.load('numpy_saves/pi_b_mesh_init__cartpole_'+name+'.npy').tolist()
    output_values = np.load('numpy_saves/pi_output_values__cartpole_'+name+'.npy')
    b_mesh = BarycentricMesh(b_mesh_init)
    ctg = np.load('numpy_saves/ctg__cartpole_'+name+'.npy')
    return BarycentricMeshSystem(b_mesh, output_values), ctg

# u_costs = [32400, 20e3, 10e3, 5e3, 2.5e3, 1e3, 500, 250, 100]
u_cost = (
#     "100."
#     "250"
#     "500"
#     "1000.0"
#     "2500.0"
#     "5000.0"
#     "10000.0"
#     "20000.0"
    "32400"
)
torque_limit= (
    "1000"
)
# policy_name = "u_cost="+u_cost
policy_name = "u_cost="+u_cost+"_torque_limit="+torque_limit

# save_policy(policy_name)
policy, ctg = load_policy(policy_name)

In [54]:
def visme(pi, duration=10.):
    # Animate the resulting policy.
    builder = DiagramBuilder()
    tree = RigidBodyTree("/opt/underactuated/src/cartpole/cartpole.urdf",
                         FloatingBaseType.kFixed)
    plant = RigidBodyPlant(tree)
    plant_system = builder.AddSystem(plant)


    # TODO(russt): add wrap-around logic to barycentric mesh
    # (so the policy has it, too)
    class WrapTheta(VectorSystem):
        def __init__(self):
            VectorSystem.__init__(self, 4, 4)

        def _DoCalcVectorOutput(self, context, input, state, output):
            output[:] = input
            twoPI = 2.*math.pi
            output[1] = output[1] - twoPI * math.floor(output[1] / twoPI)


    wrap = builder.AddSystem(WrapTheta())
    builder.Connect(plant_system.get_output_port(0), wrap.get_input_port(0))
    # vi_policy = builder.AddSystem(test)
    vi_policy = builder.AddSystem(pi)
    builder.Connect(wrap.get_output_port(0), vi_policy.get_input_port(0))
    builder.Connect(vi_policy.get_output_port(0), plant_system.get_input_port(0))

    logger = builder.AddSystem(SignalLogger(4))
    logger._DeclarePeriodicPublish(0.033333, 0.0)
    builder.Connect(plant_system.get_output_port(0), logger.get_input_port(0))

    diagram = builder.Build()
    simulator = Simulator(diagram)
    simulator.set_publish_every_time_step(False)

    state = simulator.get_mutable_context().get_mutable_continuous_state_vector()
    state.SetFromVector([-1., math.pi-2, 1., -1.])
    
    simulator.StepTo(duration)
    
    # Visualize the result as a video.
    # vis = PendulumVisualizer()
    vis = PlanarRigidBodyVisualizer(tree, xlim=[-12.5, 12.5], ylim=[-1, 2.5])
    ani = vis.animate(logger, repeat=True)

    # plt.show()λ 
    # Things added to get visualizations in an ipynb
    plt.close(vis.fig)
    return HTML(ani.to_html5_video())

In [55]:
visme(policy)

In [56]:
count = 0
for val in np.abs(policy.get_output_values()[0]):
    if val > 995:
        count += 1
print("{} / {}".format(count, len(policy.get_output_values()[0])))

940 / 204687


In [69]:

# SUPER OVERFITTING + L2 + parameter distance metric!
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from nn_system.networks import FC, FCBIG, MLPSMALL, MLP, MLPBIG

# net = FCBIG(n_inputs=4, h_sz=8)
# net = FCBIG(n_inputs=4, h_sz=32) # <--- the one!
# net = MLP(n_inputs=4, h_sz=8)
# net = MLP(n_inputs=4, h_sz=256)#, dropout=True)
# net = MLPBIG(n_inputs=4, h_sz=128)#, dropout=True)

criterion = nn.MSELoss(reduction="mean")
optimizer = optim.Adam(net.parameters(), 1e-5)


EPOCHS = 5

# My data
# np_cost_to_go = cost_to_go.reshape(19, 29, 21, 19)
np_policy = policy.get_output_values().reshape(19, 27, 21, 19)


def my_gen():
    for _ in range(5000):
        N = 1000
        # Copy pasted from the other notebook!
        state_steps = 19
        xbins = np.linspace(-10., 10., state_steps)
        thetabins = np.hstack((np.linspace(0., math.pi-0.2, 10), np.linspace(math.pi-0.2, math.pi+0.2, 9), np.linspace(math.pi+0.2, 2*math.pi, 10)))
        xdotbins = np.linspace(-10., 10., state_steps+2)
        thetadotbins = np.linspace(-10., 10., state_steps)

        xs_idcs        = np.random.choice(19, N).astype(np.int)
        thetas_idcs    = np.random.choice(27, N).astype(np.int)
        xdots_idcs     = np.random.choice(21, N).astype(np.int)
        thetadots_idcs = np.random.choice(19, N).astype(np.int)
        
        xs        = xbins[xs_idcs]
        thetas    = thetabins[thetas_idcs]
        xdots     = xdotbins[xdots_idcs]
        thetadots = thetadotbins[thetadots_idcs]

        inputs = zip(xs, thetas, xdots, thetadots)
        idcs = tuple([xs_idcs, thetas_idcs, xdots_idcs, thetadots_idcs])

#         labels = np_cost_to_go[idcs].reshape(-1, 1).astype(np.double)
        labels = np_policy[idcs].reshape(-1, 1).astype(np.double)

        yield inputs, labels

for epoch in range(EPOCHS):
    running_loss = 0.0
    for i, data in enumerate(my_gen(), 0):
        # Unpack data
        inputs, labels = data
        inputs = torch.tensor(inputs)
        labels = torch.tensor(labels)
#         print(inputs.size(), labels.size())

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i+1) % 100 == 0:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 11870.567
[1,   200] loss: 11651.909
[1,   300] loss: 11972.403
[1,   400] loss: 11640.284
[1,   500] loss: 11787.006
[1,   600] loss: 11656.479
[1,   700] loss: 11818.835
[1,   800] loss: 11513.952
[1,   900] loss: 11870.312
[1,  1000] loss: 11613.183
[1,  1100] loss: 11721.718
[1,  1200] loss: 11679.020
[1,  1300] loss: 11819.714
[1,  1400] loss: 11728.623
[1,  1500] loss: 11864.275
[1,  1600] loss: 11364.189
[1,  1700] loss: 11732.679
[1,  1800] loss: 11753.188
[1,  1900] loss: 11740.373
[1,  2000] loss: 11795.855
[1,  2100] loss: 11958.060
[1,  2200] loss: 11678.248
[1,  2300] loss: 11812.900
[1,  2400] loss: 11691.146
[1,  2500] loss: 11797.719
[1,  2600] loss: 11722.551
[1,  2700] loss: 11645.054
[1,  2800] loss: 11738.671
[1,  2900] loss: 11607.100
[1,  3000] loss: 11576.055
[1,  3100] loss: 11643.737
[1,  3200] loss: 11747.558
[1,  3300] loss: 11627.999
[1,  3400] loss: 11862.260
[1,  3500] loss: 12023.488
[1,  3600] loss: 11690.085
[1,  3700] loss: 11684.138
[

In [67]:
from nn_system.NNSystem import NNSystem

visme(NNSystem(net), 3.)