In [43]:
%matplotlib inline
from __future__ import print_function, absolute_import

import math
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

from pydrake.all import (
    DiagramBuilder,
    FloatingBaseType,
    RigidBodyTree,
    RigidBodyPlant,
    SignalLogger, 
    Simulator, 
    VectorSystem
)
# from pydrake.examples.pendulum import PendulumPlant
from pydrake.systems.controllers import (
    DynamicProgrammingOptions, FittedValueIteration, PeriodicBoundaryCondition)
# from traj.visualizer import PendulumVisualizer
# from pydrake.examples.acrobot import AcrobotPlant
from underactuated import (
    PlanarRigidBodyVisualizer
)

In [44]:
from pydrake.all import (BarycentricMesh, BarycentricMeshSystem)
def save_policy(name): # binds to policy, state_grid, and cost_to_go
    output_values = policy.get_output_values()
    np.save('numpy_saves/pi_b_mesh_init__cartpole_'+name, state_grid)
    np.save('numpy_saves/pi_output_values__cartpole_'+name, output_values)
    np.save('numpy_saves/ctg__cartpole_'+name, cost_to_go)
def load_policy(name):
    b_mesh_init = np.load('numpy_saves/pi_b_mesh_init__cartpole_'+name+'.npy').tolist()
    output_values = np.load('numpy_saves/pi_output_values__cartpole_'+name+'.npy')
    b_mesh = BarycentricMesh(b_mesh_init)
    ctg = np.load('numpy_saves/ctg__cartpole_'+name+'.npy')
    return BarycentricMeshSystem(b_mesh, output_values), ctg

# u_costs = [32400, 20e3, 10e3, 5e3, 2.5e3, 1e3, 500, 250, 100]
u_cost = (
#     "100."
#     "250"
#     "500"
#     "1000.0"
    "2500.0"
#     "5000.0"
#     "10000.0"
#     "20000.0"
#     "32400"
)
policy_name = "u_cost="+u_cost

# save_policy(policy_name)
policy, ctg = load_policy(policy_name)

In [None]:
def visme(pi, duration=10.):
    # Animate the resulting policy.
    builder = DiagramBuilder()
    tree = RigidBodyTree("/opt/underactuated/src/cartpole/cartpole.urdf",
                         FloatingBaseType.kFixed)
    plant = RigidBodyPlant(tree)
    plant_system = builder.AddSystem(plant)


    # TODO(russt): add wrap-around logic to barycentric mesh
    # (so the policy has it, too)
    class WrapTheta(VectorSystem):
        def __init__(self):
            VectorSystem.__init__(self, 4, 4)

        def _DoCalcVectorOutput(self, context, input, state, output):
            output[:] = input
            twoPI = 2.*math.pi
            output[1] = output[1] - twoPI * math.floor(output[1] / twoPI)


    wrap = builder.AddSystem(WrapTheta())
    builder.Connect(plant_system.get_output_port(0), wrap.get_input_port(0))
    # vi_policy = builder.AddSystem(test)
    vi_policy = builder.AddSystem(pi)
    builder.Connect(wrap.get_output_port(0), vi_policy.get_input_port(0))
    builder.Connect(vi_policy.get_output_port(0), plant_system.get_input_port(0))

    logger = builder.AddSystem(SignalLogger(4))
    logger._DeclarePeriodicPublish(0.033333, 0.0)
    builder.Connect(plant_system.get_output_port(0), logger.get_input_port(0))

    diagram = builder.Build()
    simulator = Simulator(diagram)
    simulator.set_publish_every_time_step(False)

    state = simulator.get_mutable_context().get_mutable_continuous_state_vector()
    state.SetFromVector([-1., math.pi-2, 1., -1.])
    
    simulator.StepTo(duration)
    
    # Visualize the result as a video.
    # vis = PendulumVisualizer()
    vis = PlanarRigidBodyVisualizer(tree, xlim=[-12.5, 12.5], ylim=[-1, 2.5])
    ani = vis.animate(logger, repeat=True)

    # plt.show()λ 
    # Things added to get visualizations in an ipynb
    plt.close(vis.fig)
    return HTML(ani.to_html5_video())

In [46]:
visme(policy)

In [47]:
count = 0
for val in np.abs(policy.get_output_values()[0]):
    if val > 248:
        count += 1
print("{} / {}".format(count, len(policy.get_output_values()[0])))

1116 / 204687


In [48]:

# SUPER OVERFITTING + L2 + parameter distance metric!
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from nn_system.networks import FC, FCBIG, MLPSMALL, MLP, MLPBIG

# net = FCBIG(n_inputs=4, h_sz=8)
# net = FCBIG(n_inputs=4, h_sz=32) # <--- the one!
# net = MLP(n_inputs=4, h_sz=8)
# net = MLP(n_inputs=4, h_sz=256)#, dropout=True)
net = MLPBIG(n_inputs=4, h_sz=128)#, dropout=True)

criterion = nn.MSELoss(reduction="mean")
optimizer = optim.Adam(net.parameters(), 1e-4)


EPOCHS = 50

# My data
# np_cost_to_go = cost_to_go.reshape(19, 29, 21, 19)
np_policy = policy.get_output_values().reshape(19, 27, 21, 19)

def my_gen():
    for _ in range(1000):
        N = 1000
        # Copy pasted from the other notebook!
        state_steps = 19
        xbins = np.linspace(-10., 10., state_steps)
        thetabins = np.hstack((np.linspace(0., math.pi-0.2, 10), np.linspace(math.pi-0.2, math.pi+0.2, 9), np.linspace(math.pi+0.2, 2*math.pi, 10)))
        xdotbins = np.linspace(-10., 10., state_steps+2)
        thetadotbins = np.linspace(-10., 10., state_steps)

        xs_idcs        = np.random.choice(19, N).astype(np.int)
        thetas_idcs    = np.random.choice(27, N).astype(np.int)
        xdots_idcs     = np.random.choice(21, N).astype(np.int)
        thetadots_idcs = np.random.choice(19, N).astype(np.int)
        
        xs        = xbins[xs_idcs]
        thetas    = thetabins[thetas_idcs]
        xdots     = xdotbins[xdots_idcs]
        thetadots = thetadotbins[thetadots_idcs]

        inputs = zip(xs, thetas, xdots, thetadots)
        idcs = tuple([xs_idcs, thetas_idcs, xdots_idcs, thetadots_idcs])

#         labels = np_cost_to_go[idcs].reshape(-1, 1).astype(np.double)
        labels = np_policy[idcs].reshape(-1, 1).astype(np.double)

        yield inputs, labels

for epoch in range(EPOCHS):
    running_loss = 0.0
    for i, data in enumerate(my_gen(), 0):
        # Unpack data
        inputs, labels = data
        inputs = torch.tensor(inputs)
        labels = torch.tensor(labels)
#         print(inputs.size(), labels.size())

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i+1) % 100 == 0:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
        running_loss = 0.0

print('Finished Training')

[1,   100] loss: 116.016
[1,   200] loss: 94.553
[1,   300] loss: 74.787
[1,   400] loss: 58.294
[1,   500] loss: 60.585
[1,   600] loss: 60.821
[1,   700] loss: 58.318
[1,   800] loss: 48.830
[1,   900] loss: 53.232
[1,  1000] loss: 48.623
[2,   100] loss: 48.926
[2,   200] loss: 54.423
[2,   300] loss: 53.882
[2,   400] loss: 51.303
[2,   500] loss: 48.187
[2,   600] loss: 43.344
[2,   700] loss: 43.027
[2,   800] loss: 40.734
[2,   900] loss: 36.977
[2,  1000] loss: 37.746
[3,   100] loss: 41.172
[3,   200] loss: 49.042
[3,   300] loss: 30.549
[3,   400] loss: 45.560
[3,   500] loss: 41.914
[3,   600] loss: 37.717
[3,   700] loss: 34.502
[3,   800] loss: 33.825
[3,   900] loss: 35.307
[3,  1000] loss: 42.899
[4,   100] loss: 35.707
[4,   200] loss: 39.451
[4,   300] loss: 41.801
[4,   400] loss: 34.808
[4,   500] loss: 43.062
[4,   600] loss: 40.284
[4,   700] loss: 37.581
[4,   800] loss: 40.942
[4,   900] loss: 29.696
[4,  1000] loss: 33.590
[5,   100] loss: 26.443
[5,   200] loss

[34,   300] loss: 20.317
[34,   400] loss: 24.448
[34,   500] loss: 32.387
[34,   600] loss: 23.134
[34,   700] loss: 23.497
[34,   800] loss: 20.214
[34,   900] loss: 23.092
[34,  1000] loss: 18.737
[35,   100] loss: 23.823
[35,   200] loss: 20.831
[35,   300] loss: 26.997
[35,   400] loss: 22.168
[35,   500] loss: 22.180
[35,   600] loss: 18.577
[35,   700] loss: 19.312
[35,   800] loss: 25.318
[35,   900] loss: 19.581
[35,  1000] loss: 35.178
[36,   100] loss: 20.172
[36,   200] loss: 30.751
[36,   300] loss: 24.225
[36,   400] loss: 28.051
[36,   500] loss: 17.123
[36,   600] loss: 24.500
[36,   700] loss: 22.948
[36,   800] loss: 36.099
[36,   900] loss: 27.192
[36,  1000] loss: 22.263
[37,   100] loss: 20.729
[37,   200] loss: 29.400
[37,   300] loss: 21.222
[37,   400] loss: 24.164
[37,   500] loss: 22.227
[37,   600] loss: 24.556
[37,   700] loss: 22.905
[37,   800] loss: 18.685
[37,   900] loss: 23.350
[37,  1000] loss: 18.877
[38,   100] loss: 24.656
[38,   200] loss: 18.974


In [None]:
from nn_system.NNSystem import NNSystem

visme(NNSystem(net))