# Testing LQR, H_inf, GPC, BPC Controllers with  Lifter
The entire reason we were interested in neural nets with this property is to ensure that minimizing state norm of the lifted state corresponds to minimizing norm of the inputs!. Lifters with this property technically form an LDS with quadratic costs, lending themselves to provable control via LQR (optimal control, $H_{\infty}$ (robust control), and perhaps even GPC (best of both worlds?)!

In [1]:
# imports
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)  # set level to INFO for wordy
import matplotlib.pyplot as plt
import tqdm

import numpy as np
import jax.numpy as jnp

from extravaganza.dynamical_systems import LDS, Gym

from extravaganza.observables import TimeDelayedObservation, FullObservation, Trajectory
from extravaganza.sysid import Lifter, LiftedController
from extravaganza.controllers import LQR, HINF, BPC, GPC, RBPC, EvanBPC, ConstantController
from extravaganza.rescalers import ADAM, D_ADAM, DoWG, FIXED_RESCALE
from extravaganza.stats import Stats
from extravaganza.utils import ylim, render, append, opnorm, dare_gain, least_squares
from extravaganza.experiments import Experiment

# seeds for randomness. setting to `None` uses random seeds
SYSTEM_SEED = None
CONTROLLER_SEED = None
SYSID_SEED = None

# --------------------------------------------------------------------------------------
# --------------------------    SYSTEM HYPERPARAMETERS    ------------------------------
# --------------------------------------------------------------------------------------

reset_condition = lambda t: t % 200000 == 0  # how often to reset the system

ds = 4
du = 1
initial_control = jnp.zeros(du)
make_system = lambda : Gym('CartPoleContinuous-v1', 
                           repeat=3,
                           use_reward_costs=False, send_done=False, max_episode_len=600, seed=SYSTEM_SEED,
                          render=False)

# observable = TimeDelayedObservation(hh=2, control_dim=du, state_dim=ds, time_embedding_dim=8,
#                                     use_states=True, use_costs=True, use_controls=True, use_time=False)
observable = FullObservation(ds)
ds = observable.obs_dim
ds

INFO: Created a temporary directory at /var/folders/5m/0xr906c130vdqvkm3g21n6wr0000gn/T/tmpw2nkspqq
INFO: Writing /var/folders/5m/0xr906c130vdqvkm3g21n6wr0000gn/T/tmpw2nkspqq/_remote_module_non_scriptable.py
INFO: Unable to initialize backend 'cuda': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
INFO: Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
INFO: Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client'
INFO: Unable to initialize backend 'plugin': xla_extension has no attributes named get_plugin_device_client. Compile TensorFlow with //tensorflow/compiler/xla/python:enable_plugin_device set to true (defaults to false) to enable this.


4

In [2]:
T0 = 20000

sysid_args = {
    'obs_dim': ds,
    'control_dim': du,

    'max_traj_len': int(1e6),
    'exploration_scales': 0.75,
    'exploration_bounds': (-1., 1.),

    'depth': 4,
    'sigma': 0,
    'determinstic_encoder': True,
    'num_epochs': 500,
    'lifter_lr': 0.001,                                                           

    'seed': SYSID_SEED,
}

sysids = {
#     'Linear': Lifter(method='identity', state_dim=ds, **sysid_args),
#     'Koopman': Lifter(method='fourier', state_dim=121, **sysid_args),
    'Lifted': Lifter(method='nn', state_dim=24, **sysid_args),
}

for k, sysid in sysids.items():  # interact in order to perform sysid
    # make system and get initial control
    system = make_system()
    control = initial_control
    print(k)
    traj = Trajectory()
    for t in tqdm.trange(T0):
        if t == 0 or reset_condition(t):
            logging.info('(EXPERIMENT): reset!')
            system.reset(None)
            sysid.end_trajectory()
            traj = Trajectory()
            pass

        cost, state = system.interact(control)  # state will be `None` for unobservable systems
        traj.add_state(cost, state)
        obs = observable(traj)
        control = sysid.explore(cost, obs) + initial_control
        traj.add_control(control)

        if (isinstance(state, jnp.ndarray) and jnp.any(jnp.isnan(state))) or (cost > 1e20):
            logging.error('(EXPERIMENT): state {} or cost {} diverged'.format(state, cost))
            assert False

    sysid.end_exploration(wordy=True)

INFO: (PC3): decoder not provided, so reconstruction error will NOT be computed
INFO: (PC3): contrastive predictive coding will NOT be computed
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


Lifted


  0%|                                                                                                                      | 0/20000 [00:00<?, ?it/s]INFO: (EXPERIMENT): reset!
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [00:24<00:00, 802.11it/s]
INFO: (LIFTER): ending sysid phase at step 20000
INFO: training!
INFO: mean loss for epochs -25 - 0:
INFO: 		l2 linearization: 0.00013526459224522114
INFO: 		simplification: 0.00639486238360405


1
20000


INFO: mean loss for epochs 0 - 25:
INFO: 		l2 linearization: 0.0005205188132822513
INFO: 		simplification: 0.0011425607576966287

KeyboardInterrupt



In [None]:
# --------------------------------------------------------------------------------------
# ------------------------    EXPERIMENT HYPERPARAMETERS    ----------------------------
# --------------------------------------------------------------------------------------

num_trials = 1
T = 2000  # total timesteps
use_multiprocessing = False
render_every = None

# --------------------------------------------------------------------------------------
# ------------------------    LIFT/SYSID HYPERPARAMETERS    ----------------------------
# --------------------------------------------------------------------------------------

controllers_to_test = {
#     'LQR': LQR,
#     'HINF': HINF,
#     'GPC': GPC,
# #     'BPC': BPC,
#     'RBPC': RBPC,
    'EvanBPC': EvanBPC,
}

h = 10  # controller memory length (# of w's to use on inference)
m_update_rescaler = lambda : ADAM(0.001, betas=(0.9, 0.999), use_bias_correction=True)
m0_update_rescaler = lambda : ADAM(0.00, betas=(0.9, 0.999), use_bias_correction=True)
k_update_rescaler = lambda : ADAM(0.00, betas=(0.9, 0.999), use_bias_correction=True)
# m_update_rescaler = lambda : FIXED_RESCALE(alpha=0.001)
# k_update_rescaler = lambda : FIXED_RESCALE(alpha=0.001)
evan_bpc_args = {
    'h': h,  
    'method': 'REINFORCE',
    'initial_scales': (0, 0.01, 0),  # M, M0, K   (uses M0's scale for REINFORCE)
    'rescalers': (m_update_rescaler, m0_update_rescaler, k_update_rescaler),
    'bounds': None,
    'initial_u': jnp.zeros(du),
    'decay_scales': False,
    'use_tanh': False,
    'use_stabilizing_K': False,
}

make_controllers = {'0': lambda sys: ConstantController(jnp.zeros(du), ds)}
for k, sysid in sysids.items(): 
    make_controllers.update({k + ' ' + controller_k: lambda sys: LiftedController(v(A=sysid.A, B=sysid.B, seed=CONTROLLER_SEED) if controller_k != 'EvanBPC' else v(A=sysid.A, B=sysid.B, seed=CONTROLLER_SEED, **evan_bpc_args), lifter=sysid) for controller_k, v in controllers_to_test.items()})
    
experiment_args = {
    'make_system': make_system,
    'make_controllers': make_controllers,
    'num_trials': num_trials,
    'observable': observable,
    'T': T, 
    'reset_condition': reset_condition,
    'reset_seed': SYSTEM_SEED,
    'use_multiprocessing': use_multiprocessing,
    'render_every': render_every,
}   

dut_experiment = Experiment('lifting_research_dut')
dut_experiment(**experiment_args)

In [None]:
def plot(experiment: Experiment):
    assert experiment.stats is not None, 'cannot plot the results of an experiment that hasnt been run'
    all_stats = experiment.stats
    
    # clear plot and calc nrows
    plt.clf()
    n = 3
    nrows = n + (len(all_stats) + 1) // 2
    fig, ax = plt.subplots(nrows, 2, figsize=(16, 6 * nrows))

    # plot stats
    for i, (method, stats) in enumerate(all_stats.items()):
        if stats is None: 
            logging.warning('{} had no stats'.format(method))
            continue
            
        stats.plot(ax[0, 0], 'true states', label=method, plot_idx=2)
        stats.plot(ax[1, 0], 'costs', label=method)
        stats.plot(ax[1, 1], 'costs', label=method, plot_cummean=True)
        
        stats.plot(ax[2, 0], 'states', label=method, plot_norm=True)  # norm of the "state"
        from extravaganza.sysid import LOSS_WEIGHTS
        for k in LOSS_WEIGHTS.keys(): stats.plot(ax[2, 1], k, label=k)  # various nn losses
            
        i_ax = ax[n + i // 2, i % 2]
        stats.plot(ax[0, 1], 'disturbances', label=method, plot_norm=True)
        stats.plot(i_ax, '-K @ state', label='-K @ state', plot_idx=0)
        stats.plot(i_ax, 'M \cdot w', label='M \cdot w', plot_idx=0)
        stats.plot(i_ax, 'M0', label='M0', plot_idx=0)
        i_ax.set_title('u decomp for {}'.format(method))
        i_ax.legend()

    # set titles and legends and limits and such
    # (note: `ylim()` is so useful! because sometimes one thing blows up and then autoscale messes up all plots)
    _ax = ax[0, 0]; _ax.set_title('true states'); _ax.legend()
    _ax = ax[0, 1]; _ax.set_title('disturbances'); _ax.legend()
    _ax = ax[1, 0]; _ax.set_title('instantaneous costs'); _ax.legend()
    _ax = ax[1, 1]; _ax.set_title('avg costs'); _ax.legend(); ylim(_ax, 0, 10)
    _ax = ax[2, 0]; _ax.set_title('reconstructed states'); _ax.legend()
    _ax = ax[2, 1]; _ax.set_title('nn losses'); _ax.legend()  
    pass
plot(dut_experiment)