# Continuous Time Value Iteration for Pendulum Swing Up

In [None]:
import numpy as np
from pydrake.all import (
    AddMultibodyPlantSceneGraph,
    DiagramBuilder,
    LeafSystem,
    MeshcatVisualizer,
    MultilayerPerceptron,
    Parser,
    PerceptronActivationType,
    RandomGenerator,
    RigidTransform,
    RotationMatrix,
    SceneGraph,
    Simulator,
    StartMeshcat,
    ZeroOrderHold,
)
from pydrake.examples import PendulumGeometry, PendulumPlant

from underactuated import ConfigureParser, running_as_notebook
from underactuated.optimizers import Adam
from underactuated.utils import running_as_test

In [None]:
# Start the visualizer (run this cell only once, each instance consumes a port)
meshcat = StartMeshcat()

## Problem Description

In this problem you will implement the continuous time, continuous action, and continuous state value iteration algorithm. This will be achieved by using a neural network to approximate the value function. We will be able to consider continuous actions due to our ability to find the optimal control input given the value function analytically.

In this problem, we will work with the simple pendulum. You will be asked to implement the algorithm described in [the textbook](https://underactuated.csail.mit.edu/dp.html#continuous)

We will implement the following steps:
1) Batch evaluation of a Quadratic Function
2) Computing the optimal input given the state dynamics and the gradient of the value function
3) Implementing the continuous time value iteration algorithm.

# Let's load up our Pendulum from Drake

In [None]:
plant = PendulumPlant()
plant_context = plant.CreateDefaultContext()
simulator = Simulator(plant)
actuation_input_port_index = 0
num_states = plant.num_continuous_states()
num_inputs = 1

First we'll set up our training data for the problem

In [None]:
# set up training data. states are (theta, theta_dot)
time_step = 0.01
num_samples = 50

theta_states = np.linspace(0, 2 * np.pi, num_samples)
theta_dot_states = np.linspace(-10, 10, num_samples)

state_grid = np.meshgrid(theta_states, theta_dot_states, indexing="ij")
state_data = np.vstack([s.flatten() for s in state_grid])

# zero cost state
target_state = np.array([np.pi, 0.0]).reshape(-1, 1)
state_data = np.hstack([state_data, target_state])

num_state_data = state_data.shape[1]
cur_state = plant_context.get_mutable_continuous_state_vector()

state_dynamics_x = np.empty((num_state_data, num_states))

dstate_dynamics_du = np.empty((num_states, num_inputs, num_state_data))

Deep learning typically requires iterating through the same computation many times. As such a lot of effort has been put into making several of the most common operations extremely efficient. Batch matrix multiplication, or implementing matrix-vector multiplication to apply to an array of vectors is one of these operations.

The cost function we choose in this problem will be quadratic and so it is important to efficiently implement the operation $x^TQx$ for a batch of vectors $x$.

Implement the function compute_quadratic_cost which should be able to take in a (num_state x num_samples) size data vector and output $x^TQx$ for each $x$ in the data vector. We also will want to be able to compute this same function with respect to some fixed target state. Implement compute_state_cost which computes $(x-\text{target\_state})^TQ(x-\text{target\_state})$ for each vector in data

In [None]:
def compute_quadratic_cost(Q, data):
    # computes the cost of each sample data
    # Q is of size (num_states x num_states)
    # data is of size (num_states x num_samples)
    # return a cost of size (num_samples,)
    assert Q.shape[0] == data.shape[0]
    if len(data.shape) != 2:
        data = data.reshape(-1, 1)
    cost = np.zeros(data.shape[1])  # MODIFY HERE
    return cost


def compute_state_cost(Q, target_state, data):
    # compute the state cost of each sample in state
    # Q is of size (num_states x num_states)
    # target_state is of size (num_states x 1)
    # data is of size (num_states x num_samples)
    # return is of size (num_samples,)
    if len(data.shape) != 2:
        data = data.reshape(-1, 1)
    return np.zeros(data.shape[1])  # MODIFY HERE


# Do not modify
Q = np.diag([20, 2])
R_diag = np.array([2])

# Computing the optimal input

Recall that given a control affine system, and positive definite quadratic penalty on the inputs, we can compute the optimal input with respect to our value function. Again, the optimal control input is one which we must compute for all of our state data samples and so it is important that we be able to handle a full batch of state data samples.

We wish to implement the optimal control law for a control affine system given $f_2(x) = \frac{\partial x}{\partial u}$, which for a single sample has dimensions num_states x num_inputs, and $\frac{\partial J}{\partial x}$, which for a single sample has dimensions 1 x num_states. If we append the batch dimension to the beginning, the shapes would be N x num_states x num_inputs and N x 1 x num_states, respectively. In the code, we transpose these shapes such that the batch dimension is at the end. You can find the relevant update in [Chapter 7 of the textbook](https://underactuated.csail.mit.edu/dp.html)

We will compute the optimal control law in batches. Notice that $R$ is fixed for every sample, and for simplicity we will assume that $R$ is diagonal. To compute a batch of num_samples controls, we need num_samples $\frac{\partial J}{\partial x}$ vectors and num_samples $\frac{\partial x}{\partial u}$ matrices. This leads to inputs dJdx of size (num_states x num_samples) and dstate_dynamics_du of size (num_states x num_inputs x num_samples).

*Hint: The numpy function [einsum](https://numpy.org/doc/stable/reference/generated/numpy.einsum.html) can come in handy here.*

In [None]:
def compute_u_star(R_diag, dJdX, dstate_dynamics_du):
    # R_diag is an array of size num_inputs that is the diagonal entries of R
    # dJdX is of shape (num_states x num_samples)
    # dstate_dynamics_du are (num_states x num_inputs x num_samples)
    # return u_star of shape (num_inputs x num_samples)
    assert dJdX.shape[1] == dstate_dynamics_du.shape[2]
    assert dJdX.shape[0] == dstate_dynamics_du.shape[0]

    return np.zeros(
        (dstate_dynamics_du.shape[1], dstate_dynamics_du.shape[2])
    )  # MODIFY HERE


# This simply tests that compute_u_star can handle conformable inputs. You should not get an error here once you finish running this cell
dJdX = np.asfortranarray(np.random.randn(num_states, num_state_data))
dstate_dynamics_du = np.random.randn(num_states, num_inputs, num_state_data)
u_star = compute_u_star(R_diag, dJdX, dstate_dynamics_du)

# Set up a Multilayer perceptron for the value estimate
Drake has an implementation of the multilayer perceptron (MLP, a.ka. fully connected neural network). Here we set up a MLP with two inputs, 2 hidden layers with ReLU activation, and one output. We also set up an optimizer for changing the weights of our neural network

In [None]:
value_mlp = MultilayerPerceptron(
    # whether to send input i to cos(x_i), sin(x_i)
    [True, False],
    [128, 128, 1],
    [
        PerceptronActivationType.kReLU,
        PerceptronActivationType.kReLU,
        PerceptronActivationType.kIdentity,
    ],
)
# MLP is a drake system and therefore has state (the current weights). We initialize this state randomly.
value_mlp_context = value_mlp.CreateDefaultContext()
generator = RandomGenerator(152)
value_mlp.SetRandomContext(value_mlp_context, generator)

We'll print out the shapes of our layers. Make sure you understand why these layers have these shapes. 

In [None]:
print(value_mlp.GetWeights(value_mlp_context, 0).shape)
print(value_mlp.GetWeights(value_mlp_context, 1).shape)
print(value_mlp.GetWeights(value_mlp_context, 2).shape)

# Evaluating a Drake MLP
The Drake MLP class evaluates the outputs of its MLP neural networks in place. We therefore have to pre-initialize the arrays in which we want the evaluation to take place. If we want to obtain the derivatives of the outputs with respect to the inputs (assuming the network has a single output), we also need to pre-initialize these.

Again, the neural network is capable of efficiently evaluating entire batches of data. This is much faster than repeatedly calling the evaluate function of the network for each input in a loop.

The outputs of the network will be (num_output_units x batch_size). When there is only one output unit as is the case for the value-function approximator, we obtain a $\frac{\partial J}{\partial x}$ matrix of size (num_input_units x batch size). 

Similarly, input data is passed as (num_input_units x batch_size).

For those of you familiar with Pytorch, we remark that this convention is flipped with respect to the convention there.

In [None]:
M = 30
J = np.zeros((1, M))
dJdX = np.asfortranarray(np.zeros((num_states, M)))
value_mlp.BatchOutput(
    value_mlp_context, state_data[:, np.random.randint(0, 30, M)], J, dJdX
)

# Continuous Fitted Value Iteration (CFVI)
Here we will implement the fitted value iteration algorithm. We provide skeleton code with the steps for implementing Value Iteration using a learned function approximator. The function proceeds in several steps:
1) First, we sample a large, random dataset of states.
2) Due to the control affine nature of the dynamics, we can precompute the state dynamics and $\frac{\partial x}{\partial u}$ for each state in our dataset. These quantities will enable us to very rapidly compute the state transitions as our control inputs change over the course of CFVI.
3) We will then iterate over our data in small batches to train our value function approximator. At every step we will:
    1. Compute our optimal input given the current value function approximator
    2. Compute the next state based on this optimal input 
    3. Compute our target network
    4. Update our value function approximator using the target network


Before you begin, answer the following questions to make sure you understand how the algorithm works. The majority of the CVFI code will be calculating the target network values in terms of other quantities. The target network values for the undiscounted value iteration formulation are given in [Equation 9 of Chapter 7](https://underactuated.mit.edu/dp.html) of the textbook. Note, in this problem we are doing discounted value iteration. The subscript $d$ denotes the discretized versions of the continuous time $c$ dynamics. The answers will not be graded, use the answers to help you code:
1. We consider dynamical systems with continuous-time control affine dynamics: $\dot{x}
   = f_c(x(t), u(t)) = f_1(x(t)) + f_2(x(t)) u(t)$. In the below code,
   `state_dynamics_x` is $f_1$ and  `dstate_dynamics_du` is $f_2$. Although we are using
   a continuous time dynamics $f_c$, value iteration requires a dynamics function of the
   form $x[n+1] = f_d(x[n], u[n])$. Using the `time_step` variable (which we denote $h$
   in equations), write down the expression for $f_d$ in terms of $f_c$, based on a
   simple "forward Euler" integration $x[n+1] = x[n] + h f_c(x[n], u[n])$.
2. Similarly, write the discretized cost $l_d[x[n], u^{\ast}[n]]$ in terms of $l_c$ and $u^{\ast}(t)$ using `time_step`. 
3. Write down your new expression for the target network values `Jd` based on Equation 9. Write down the expression using $l_d$, $f_d$, $u^{\ast}[n]$.

In [None]:
def ContinuousFittedValueIteration(
    plant,
    plant_context,
    value_mlp,
    state_cost_function,
    compute_u_star,
    R_diag,
    state_samples,
    time_step=0.01,
    discount_factor=1.0,
    input_port_index=0,
    lr=0.001,
    minibatch=None,
    epochs=1000,
    optim_steps_per_epoch=25,
    input_limits=None,
    target_state=None,
):
    input_port = plant.get_input_port(input_port_index)
    num_states = plant.num_continuous_states()
    num_inputs = input_port.size()
    if target_state is not None:
        np.append(state_samples, target_state)

    N = state_samples.shape[1]

    # perform some checks to make sure the inputs to the function make sense
    assert plant_context.has_only_continuous_state()
    assert value_mlp.get_input_port().size() == num_states
    assert value_mlp.layers()[-1] == 1
    assert R_diag.shape == (num_inputs,)
    assert state_samples.shape[0] == num_states
    assert time_step > 0.0
    assert discount_factor > 0.0 and discount_factor <= 1.0
    if input_limits is not None:
        assert (
            num_inputs == 1
        ), "Input limits are only supported for scalar inputs (for now)"
        assert len(input_limits) == 2

    # Seed the c++ and python random number generators.
    generator = RandomGenerator(123)
    np.random.seed(123)

    # random initialization of our Neural Network weights
    mlp_context = value_mlp.CreateDefaultContext()
    value_mlp.SetRandomContext(mlp_context, generator)

    state_cost = state_cost_function(state_samples)
    state_dynamics_x = np.empty((N, num_states))
    dstate_dynamics_du = np.empty((num_states, num_inputs, N))
    Rinv = 1 / R_diag
    state = plant_context.get_mutable_continuous_state_vector()

    # Precompute dynamics of zero-order hold and cost.
    for i in range(N):
        u = np.zeros(num_inputs)
        input_port.FixValue(plant_context, u)
        state.SetFromVector(state_samples[:, i])
        state_dynamics_x[i] = plant.EvalTimeDerivatives(plant_context).CopyToVector()
        for j in range(num_inputs):
            u[j] = 1
            input_port.FixValue(plant_context, u)
            dstate_dynamics_du[:, j, i] = (
                plant.EvalTimeDerivatives(plant_context).CopyToVector()
                - state_dynamics_x[i]
            )
            u[j] = 0

    optimizer = Adam(value_mlp.GetMutableParameters(mlp_context), lr=lr)

    if minibatch and target_state is not None:
        M = minibatch + 1
    elif minibatch:
        M = minibatch
    else:
        M = N

    J = np.zeros((1, M))
    Jnext = np.zeros((1, M))
    Jd = np.zeros((1, M))
    dJdX = np.asfortranarray(np.zeros((num_states, M)))
    dloss_dparams = np.zeros(value_mlp.num_parameters())

    last_loss = np.inf
    for epoch in range(2 if running_as_test else epochs):
        if minibatch:
            batch = np.random.randint(0, N, minibatch)
            # always include the target state in the batch
            if target_state is not None:
                batch = np.append(batch, -1)
        else:
            batch = range(N)

        # Compute dJdX
        value_mlp.BatchOutput(mlp_context, state_samples[:, batch], J, dJdX)

        # compute the next input
        u_star = np.zeros(
            (
                dstate_dynamics_du[:, :, batch].shape[1],
                dstate_dynamics_du[:, :, batch].shape[2],
            )
        )  # MODIFY HERE

        # clamp to input limits
        if input_limits is not None:
            u_star = np.clip(u_star, input_limits[0], input_limits[1])

        # compute Xnext
        Xnext = np.zeros_like(state_samples[:, batch])  # MODIFY HERE

        # compute cost
        Cost = np.zeros(len(batch))  # MODIFY HERE

        value_mlp.BatchOutput(mlp_context, Xnext, Jnext)

        # Create the target network
        Jd[:] = np.zeros(len(batch))  # MODIFY HERE

        for i in range(2 if running_as_test else optim_steps_per_epoch):
            # low pass filter target network
            if (i + 1) % 50:
                alpha = 5e-4
                Jd[:] = (1 - alpha) * Jd[:] + alpha * Jnext[:]

            # This does back prop
            loss = value_mlp.BackpropagationMeanSquaredError(
                mlp_context, state_samples[:, batch], Jd, dloss_dparams
            )
            optimizer.step(loss, dloss_dparams)
        if not minibatch and np.linalg.norm(last_loss - loss) < 1e-8:
            break
        last_loss = loss
        print(f"epoch {epoch}: loss = {loss}")

    return mlp_context

# Let's train our network!

In [None]:
from functools import partial

state_cost_function = partial(compute_state_cost, Q, target_state)
input_limits = [-2, 2]

# cartpole CVI
value_mlp_context = ContinuousFittedValueIteration(
    plant,
    plant_context,
    value_mlp,
    state_cost_function,
    compute_u_star,
    R_diag,
    state_data,
    time_step=time_step,
    discount_factor=0.999,
    input_port_index=0,
    lr=1e-4,
    minibatch=64,
    epochs=300,
    optim_steps_per_epoch=100,
    input_limits=input_limits,
    target_state=None,
)

## Lets now build our controller

In [None]:
# We build a Drake system to wire our controller to the Drake simulator


class ContinuousFittedValueIterationPolicyComputeUStar(LeafSystem):
    def __init__(
        self,
        plant,
        value_mlp,
        value_mlp_context,
        R_diag,
        compute_u_star,
        input_port_index=0,
        input_limits=None,
    ):
        LeafSystem.__init__(self)

        self.num_plant_states = value_mlp.get_input_port().size()
        self._plant = plant
        self._plant_context = plant.CreateDefaultContext()

        self.value_mlp = value_mlp
        self.value_mlp_context = value_mlp_context
        self.J = np.zeros((1, 1))
        self.dJdX = np.asfortranarray(np.zeros((self.num_plant_states, 1)))

        self.compute_u_star = compute_u_star

        self.Rinv = 1 / R_diag
        self.R_diag = R_diag
        self.input_limits = input_limits
        self.DeclareVectorInputPort("plant_state", self.num_plant_states)
        self._plant_input_port = self._plant.get_input_port(input_port_index)
        self.DeclareVectorOutputPort(
            "output", self._plant_input_port.size(), self.CalcOutput
        )

    def CalcOutput(self, context, output):
        num_inputs = self._plant_input_port.size()
        u = np.zeros(num_inputs)
        plant_state = self.get_input_port().Eval(context)

        self.value_mlp.BatchOutput(
            self.value_mlp_context,
            np.atleast_2d(plant_state).T,
            self.J,
            self.dJdX,
        )

        self._plant_context.SetContinuousState(plant_state)
        self._plant_input_port.FixValue(self._plant_context, u)
        state_dynamics_x = self._plant.EvalTimeDerivatives(
            self._plant_context
        ).CopyToVector()

        dstate_dynamics_du = np.empty((self.num_plant_states, num_inputs, 1))
        for i in range(num_inputs):
            u[i] = 1
            self._plant_input_port.FixValue(self._plant_context, u)
            dstate_dynamics_du[:, :, i] = (
                self._plant.EvalTimeDerivatives(self._plant_context).CopyToVector()
                - state_dynamics_x
            ).reshape(-1, 1)
            u[i] = 0

        u_star = self.compute_u_star(self.R_diag, self.dJdX, dstate_dynamics_du)[:, 0]
        if self.input_limits is not None:
            u_star = np.clip(u_star, self.input_limits[0], self.input_limits[1])
        for i in range(num_inputs):
            output.SetAtIndex(i, u_star[i])

In [None]:
# We build up a system diagram to run the closed loop simulation of our
# pendulum swingup controller initialize controller and plant
closed_loop_builder = DiagramBuilder()
plant_cl, scene_graph_cl = closed_loop_builder.AddSystem(
    PendulumPlant()
), closed_loop_builder.AddSystem(SceneGraph())

controller_sys = ContinuousFittedValueIterationPolicyComputeUStar(
    plant_cl,
    value_mlp,
    value_mlp_context,
    R_diag,
    compute_u_star,
    input_limits=input_limits,
)

PendulumGeometry.AddToBuilder(
    closed_loop_builder, plant_cl.get_state_output_port(), scene_graph_cl
)

controller = closed_loop_builder.AddSystem(controller_sys)
# we assume a zero-order hold between time steps
zoh = closed_loop_builder.AddSystem(ZeroOrderHold(time_step, 1))

# wire all the systems together
closed_loop_builder.Connect(plant_cl.get_output_port(), controller.get_input_port())
closed_loop_builder.Connect(controller.get_output_port(), zoh.get_input_port())
closed_loop_builder.Connect(zoh.get_output_port(), plant_cl.get_input_port())

meshcat.Delete()
meshcat.Set2dRenderMode(
    X_WC=RigidTransform(RotationMatrix.MakeZRotation(np.pi), [0, 1, 0])
)
vis = MeshcatVisualizer.AddToBuilder(closed_loop_builder, scene_graph_cl, meshcat)

diagram_closed_loop = closed_loop_builder.Build()

simulator = Simulator(diagram_closed_loop)
simulator_context = simulator.get_mutable_context()

## Pendulum swingup simulation
Now watch to see whether your fitted value iteration achieved the task of swinging up the pendulum

In [None]:
simulator.set_target_realtime_rate(1.0 if running_as_notebook else 0)
num_sim = 1 if running_as_test else 5
for i in range(num_sim):
    duration = 0.1 if running_as_test else 5.0
    simulator_context.SetTime(0.0)
    simulator_context.SetContinuousState(np.array([2 * np.pi * np.random.rand(), 0]))
    simulator.Initialize()
    simulator.AdvanceTo(duration)

In [None]:
from underactuated.exercises.dp.test_pendulum_cvi import TestPendulumCVI
from underactuated.exercises.grader import Grader

Grader.grade_output([TestPendulumCVI], [locals()], "results.json")
Grader.print_test_results("results.json")

## Here you can try to tune parameters to get the Cartpole to swing up.

Note that the notebook is not graded after this point. This is here so you can have guidance to play with another system

## Let's build up the Cartpole system

In [None]:
# load_cartpole()
builder = DiagramBuilder()
cart_plant, cart_scene_graph = AddMultibodyPlantSceneGraph(builder, time_step=0.0)
parser = Parser(cart_plant)
ConfigureParser(parser)
parser.AddModelsFromUrl("package://underactuated/models/cartpole.urdf")
cart_plant.Finalize()
cart_plant_context = cart_plant.CreateDefaultContext()

cart_diagram = builder.Build()

num_states = cart_plant.num_continuous_states()

cart_actuation_port_index = cart_plant.get_actuation_input_port().get_index()
num_inputs = cart_plant.get_input_port(cart_actuation_port_index)

## Cartpole data set up
Notice that the cartpole has 4 states and so it is much trickier to sample a representative data set. Here we have given you some starter code for making a grid of each dimension.

In [None]:
# set up training data. states are (x, theta, x_dot, theta_dot)
num_samples = 3
x_states_cart = np.linspace(-2, 2, num_samples)
theta_states_cart = np.linspace(0, 2 * np.pi, 50)
x_dot_states_cart = np.linspace(-10, 10, num_samples)
theta_dot_states_cart = np.linspace(-10, 10, num_samples)
state_grid_cart = np.meshgrid(
    x_states_cart,
    theta_states_cart,
    x_dot_states_cart,
    theta_dot_states_cart,
    indexing="ij",
)
state_data_cart = np.vstack([s.flatten() for s in state_grid_cart])

# zero cost state
cart_target_state = np.array([0, np.pi, 0, 0]).reshape(-1, 1)

Q_cart = np.diag([0.1, 20, 1, 1])
R_cart = np.array([2])

In [None]:
# A neural network for the cartpole
cart_value_mlp = MultilayerPerceptron(
    [False, True, False, False],
    [128, 128, 1],
    [
        PerceptronActivationType.kReLU,
        PerceptronActivationType.kReLU,
        #  PerceptronActivationType.kReLU,
        PerceptronActivationType.kIdentity,
    ],
)

In [None]:
state_cost_function_cart = partial(compute_state_cost, Q_cart, cart_target_state)
# time to train the neural network
cart_value_mlp_context = ContinuousFittedValueIteration(
    cart_plant,
    cart_plant_context,
    cart_value_mlp,
    state_cost_function_cart,
    compute_u_star,
    R_cart,
    state_data_cart,
    time_step=0.01,
    discount_factor=0.9999,
    input_port_index=cart_actuation_port_index,
    lr=1e-4,
    minibatch=64,
    epochs=2,
    optim_steps_per_epoch=100,
    input_limits=None,
    target_state=target_state,
)

## Cartpole Swingup
Now look and see whether your fitted value iteration actually manages to implement the cartpole swingup

In [None]:
# initialize controller and plant
closed_loop_builder_cart = DiagramBuilder()

cart_plant_cl, cart_scene_graph_cl = AddMultibodyPlantSceneGraph(
    closed_loop_builder_cart, time_step=0.0
)

parser = Parser(cart_plant_cl)
ConfigureParser(parser)
parser.AddModelsFromUrl("package://underactuated/models/cartpole.urdf")
cart_plant_cl.Finalize()
cart_plant_context_cl = cart_plant_cl.CreateDefaultContext()
cart_controller_sys = ContinuousFittedValueIterationPolicyComputeUStar(
    cart_plant_cl,
    cart_value_mlp,
    cart_value_mlp_context,
    R_diag,
    compute_u_star,
    input_port_index=cart_actuation_port_index,
)


cart_controller = closed_loop_builder_cart.AddSystem(cart_controller_sys)
# we assume a zero-order hold between time steps
zoh_cart = closed_loop_builder_cart.AddSystem(ZeroOrderHold(time_step, 1))

# wire all the systems together
closed_loop_builder_cart.Connect(
    cart_plant_cl.get_state_output_port(), cart_controller.get_input_port()
)
closed_loop_builder_cart.Connect(
    cart_controller.get_output_port(), zoh_cart.get_input_port()
)
closed_loop_builder_cart.Connect(
    zoh_cart.get_output_port(),
    cart_plant_cl.get_input_port(cart_actuation_port_index),
)

meshcat.Delete()
meshcat.Set2dRenderMode(xmin=-2.5, xmax=2.5, ymin=-1.0, ymax=2.5)
vis = MeshcatVisualizer.AddToBuilder(
    closed_loop_builder_cart, cart_scene_graph_cl, meshcat
)

cart_diagram_closed_loop = closed_loop_builder_cart.Build()

cart_simulator = Simulator(cart_diagram_closed_loop)
cart_simulator_context = cart_simulator.get_mutable_context()

In [None]:
cart_simulator.set_target_realtime_rate(1.0 if running_as_notebook else 0)
duration = 0.1 if running_as_test else 10.0
for i in range(1):
    cart_simulator_context.SetTime(0.0)
    cart_simulator_context.SetContinuousState([0, 0, 0, 0])
    cart_simulator.Initialize()
    cart_simulator.AdvanceTo(duration)