# PPO

In [1]:
import logging
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)  # set level to INFO for wordy
import matplotlib.pyplot as plt
from IPython.display import HTML

import numpy as np
import jax.numpy as jnp

from extravaganza.dynamical_systems import PPOGym

from extravaganza.lifters import NoLift, RandomLift, LearnedLift
from extravaganza.sysid import SysID
from extravaganza.controllers import LiftedBPC, ConstantController
from extravaganza.rescalers import ADAM, D_ADAM, DoWG
from extravaganza.utils import ylim, render
from extravaganza.experiments import Experiment

# seeds for randomness. setting to `None` uses random seeds
SYSTEM_SEED = None
CONTROLLER_SEED = None
LIFTER_AND_SYSID_SEED = None

## System
Here, we tune the parameters of a PPO algorithm learning on an OpenAI Gym environment. The PPO update step is
$$L^{CLIP}(\theta) = \mathbb{E} \left[ \min\left(r(\theta, \theta_{\text{old}}, s_t, a_t) \cdot \hat{A}_t, \text{clip}\left(r(\theta, \theta_{\text{old}}, s_t, a_t), 1 - \epsilon, 1 + \epsilon\right) \cdot \hat{A}_t\right)\right],
$$
and so we can tune the clip parameter $\epsilon$ in addition to learning rate $\eta$ and more.

We play on games like `CartPole`.

## Hyperparameters

In [2]:
name = 'ppo_????'
filename = '../logs/{}.pkl'.format(name)

def get_experiment_args():
    # --------------------------------------------------------------------------------------
    # ------------------------    EXPERIMENT HYPERPARAMETERS    ----------------------------
    # --------------------------------------------------------------------------------------

    num_trials = 1
    T = 3000
    T0 = 500000
    reset_condition = lambda t: False
    use_multiprocessing = False
    render_every = None

    # num_trials = 5
    # T = 12000  # total timesteps
    # T0 = 8000  # number of timesteps to just sysid for our methods
    # reset_condition = lambda t: t <= 10000 and t % 1000 == 0  # when to reset the system
    # use_multiprocessing = False
    # render_every = None

    # --------------------------------------------------------------------------------------
    # --------------------------    SYSTEM HYPERPARAMETERS    ------------------------------
    # --------------------------------------------------------------------------------------

    du = 1  # control dim
    initial_eps_clip = 0.1  # initial value for eps
    base = 1.5
    def apply_control(control, system): system.ppo.eps_clip = initial_eps_clip * 1.5 ** np.clip(control.item(), -7, 7)

    env_name = 'CartPole-v1'  
    # env_name = 'MountainCarContinuous-v0'
    continuous_action_space = False

    system_args = {
        'apply_control': apply_control,
        'control_dim': du,
        'env_name': env_name,
        'lr_actor': 0.0003,
        'lr_critic': 0.001,
        'eps_clip': initial_eps_clip,
        'gamma': 0.98,
        'max_episode_len': 400,
        'repeat': 10,
        'gym_repeat': 1,
        'seed': SYSTEM_SEED,
    }
    make_system = lambda : PPOGym(**system_args)

    # --------------------------------------------------------------------------------------
    # ------------------------    LIFT/SYSID HYPERPARAMETERS    ----------------------------
    # --------------------------------------------------------------------------------------

    sysid_method = 'regression'
    sysid_scale = 0.05

    learned_lift_args = {
        'lift_lr': 0.004,
        'sysid_lr': 0.004,
        'cost_lr': 0.001,
        'depth': 4,
        'buffer_maxlen': int(1e6),
        'num_epochs': 100,
        'batch_size': 64,
        'seed': LIFTER_AND_SYSID_SEED
    }

    # --------------------------------------------------------------------------------------
    # ------------------------    CONTROLLER HYPERPARAMETERS    ----------------------------
    # --------------------------------------------------------------------------------------

    h = 10  # controller memory length (# of w's to use on inference)
    hh = 10  # history length of the cost/control histories
    lift_dim = 20  # dimension to lift to

    m_update_rescaler = lambda : ADAM(0.002, betas=(0.9, 0.999))
    m0_update_rescaler = lambda : ADAM(0.00, betas=(0.9, 0.999))
    k_update_rescaler = lambda : ADAM(0.002, betas=(0.9, 0.999))

    lifted_bpc_args = {
        'h': h,
        'method': 'REINFORCE',
        'initial_scales': (0.01, 0.01, 0.01),  # M, M0, K   (uses M0's scale for REINFORCE)
        'rescalers': (m_update_rescaler, m0_update_rescaler, k_update_rescaler),
        'T0': T0,
    #     'bounds': (-5, 5),
        'initial_u': jnp.zeros(du),
        'decay_scales': False,
        'use_tanh': False,
        'use_K_from_sysid': False,
        'seed': CONTROLLER_SEED
    }

    make_controllers = {
        # these are w.r.t initial value of 0.1 with base of 1.5
#         '0.01': lambda sys: ConstantController(-5.67887358, du),
        '0.1': lambda sys: ConstantController(0, du),
        '0.2': lambda sys: ConstantController(1.71, du),
        '0.9': lambda sys: ConstantController(5.419, du),

    #     'No Lift': lambda sys: LiftedBPC(lifter=NoLift(hh, du, LIFTER_AND_SYSID_SEED), sysid=SysID(sysid_method, du, hh, sysid_scale, LIFTER_AND_SYSID_SEED), **lifted_bpc_args),
    #     'Random Lift': lambda sys: LiftedBPC(lifter=RandomLift(hh, du, lift_dim, learned_lift_args['depth'], LIFTER_AND_SYSID_SEED), sysid=SysID(sysid_method, du, lift_dim, sysid_scale, LIFTER_AND_SYSID_SEED), **lifted_bpc_args),
    #     'Learned Lift': lambda sys: LiftedBPC(lifter=LearnedLift(hh, du, lift_dim, scale=sysid_scale, **learned_lift_args), **lifted_bpc_args)
    }
    experiment_args = {
        'make_system': make_system,
        'make_controllers': make_controllers,
        'num_trials': num_trials,
        'T': T,
        'reset_condition': reset_condition,
        'reset_seed': SYSTEM_SEED,
        'use_multiprocessing': use_multiprocessing,
        'render_every': render_every,
    }
    return experiment_args

## actually run the thing :)

In [3]:
# run
experiment = Experiment(name)
stats = experiment(get_experiment_args)

INFO: Unable to initialize backend 'cuda': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
INFO: Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
INFO: Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client'
INFO: Unable to initialize backend 'plugin': xla_extension has no attributes named get_plugin_device_client. Compile TensorFlow with //tensorflow/compiler/xla/python:enable_plugin_device set to true (defaults to false) to enable this.
INFO: (EXPERIMENT) --------------------------------------------------
INFO: (EXPERIMENT) ----------------- TRIAL 0 -----------------------
INFO: (EXPERIMENT) --------------------------------------------------

INFO: (EXPERIMENT): testing 0.1
  0%|▎                                                                                 | 13/3000 [00:00<00:46, 63.61it/s, control=0, cost=-23]

tensor([ 0.0412,  0.3955, -0.0358, -0.6127]) tensor([ 0.0491,  0.5911, -0.0480, -0.9164]) False 1.0
tensor([ 0.0491,  0.5911, -0.0480, -0.9164]) tensor([ 0.0609,  0.7869, -0.0664, -1.2238]) False 1.0
tensor([ 0.0609,  0.7869, -0.0664, -1.2238]) tensor([ 0.0767,  0.5926, -0.0908, -0.9527]) False 1.0
tensor([ 0.0767,  0.5926, -0.0908, -0.9527]) tensor([ 0.0885,  0.7889, -0.1099, -1.2724]) False 1.0
tensor([ 0.0885,  0.7889, -0.1099, -1.2724]) tensor([ 0.1043,  0.5953, -0.1353, -1.0161]) False 1.0
tensor([ 0.1043,  0.5953, -0.1353, -1.0161]) tensor([ 0.1162,  0.7919, -0.1557, -1.3480]) False 1.0
tensor([ 0.1162,  0.7919, -0.1557, -1.3480]) tensor([ 0.1320,  0.5991, -0.1826, -1.1078]) False 1.0
tensor([ 0.1320,  0.5991, -0.1826, -1.1078]) tensor([ 0.1440,  0.7961, -0.2048, -1.4518]) False 1.0
tensor([ 0.1440,  0.7961, -0.2048, -1.4518]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0233,  0.4150,  0.0098, -0.5586]) tensor([ 0.0316,  0.6100, -0.0014, -0.8482]) False 1.0
tensor([ 0.0316,  0.6

  1%|▊                                                                                 | 30/3000 [00:00<00:39, 75.74it/s, control=0, cost=-10]

tensor([ 0.0198,  0.3520, -0.0179, -0.5511]) tensor([ 0.0269,  0.1571, -0.0289, -0.2641]) False 1.0
tensor([ 0.0269,  0.1571, -0.0289, -0.2641]) tensor([ 0.0300,  0.3527, -0.0342, -0.5658]) False 1.0
tensor([ 0.0300,  0.3527, -0.0342, -0.5658]) tensor([ 0.0371,  0.1580, -0.0455, -0.2841]) False 1.0
tensor([ 0.0371,  0.1580, -0.0455, -0.2841]) tensor([ 0.0402, -0.0364, -0.0512, -0.0061]) False 1.0
tensor([ 0.0402, -0.0364, -0.0512, -0.0061]) tensor([ 0.0395,  0.1594, -0.0513, -0.3145]) False 1.0
tensor([ 0.0395,  0.1594, -0.0513, -0.3145]) tensor([ 0.0427,  0.3552, -0.0576, -0.6229]) False 1.0
tensor([ 0.0427,  0.3552, -0.0576, -0.6229]) tensor([ 0.0498,  0.1610, -0.0701, -0.3489]) False 1.0
tensor([ 0.0498,  0.1610, -0.0701, -0.3489]) tensor([ 0.0530, -0.0331, -0.0771, -0.0791]) False 1.0
tensor([ 0.0530, -0.0331, -0.0771, -0.0791]) tensor([ 0.0524,  0.1630, -0.0786, -0.3951]) False 1.0
tensor([ 0.0524,  0.1630, -0.0786, -0.3951]) tensor([ 0.0556, -0.0309, -0.0865, -0.1282]) False 1.0


  2%|█▎                                                                                | 50/3000 [00:00<00:33, 88.35it/s, control=0, cost=-10]

tensor([ 0.0753,  0.6323, -0.1695, -1.1040]) tensor([ 0.0879,  0.8292, -0.1916, -1.4447]) False 1.0
tensor([ 0.0879,  0.8292, -0.1916, -1.4447]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0106,  0.0439, -0.0271,  0.0117]) tensor([ 0.0115,  0.2394, -0.0268, -0.2894]) False 1.0
tensor([ 0.0115,  0.2394, -0.0268, -0.2894]) tensor([ 0.0162,  0.4348, -0.0326, -0.5904]) False 1.0
tensor([ 0.0162,  0.4348, -0.0326, -0.5904]) tensor([ 0.0249,  0.6304, -0.0444, -0.8932]) False 1.0
tensor([ 0.0249,  0.6304, -0.0444, -0.8932]) tensor([ 0.0376,  0.8261, -0.0623, -1.1995]) False 1.0
tensor([ 0.0376,  0.8261, -0.0623, -1.1995]) tensor([ 0.0541,  1.0220, -0.0863, -1.5111]) False 1.0
tensor([ 0.0541,  1.0220, -0.0863, -1.5111]) tensor([ 0.0745,  0.8280, -0.1165, -1.2465]) False 1.0
tensor([ 0.0745,  0.8280, -0.1165, -1.2465]) tensor([ 0.0911,  1.0244, -0.1414, -1.5733]) False 1.0
tensor([ 0.0911,  1.0244, -0.1414, -1.5733]) tensor([ 0.1116,  1.2209, -0.1729, -1.9066]) False 1.0
tensor([ 0.1116,  1.2

  2%|█▋                                                                                | 60/3000 [00:00<00:32, 89.92it/s, control=0, cost=-10]

tensor([-0.0950, -0.9493,  0.1824,  1.5142]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0352,  0.3635,  0.0243, -0.6136]) tensor([-0.0280,  0.5583,  0.0120, -0.8985]) False 1.0
tensor([-0.0280,  0.5583,  0.0120, -0.8985]) tensor([-0.0168,  0.7533, -0.0060, -1.1874]) False 1.0
tensor([-0.0168,  0.7533, -0.0060, -1.1874]) tensor([-0.0017,  0.9485, -0.0297, -1.4820]) False 1.0
tensor([-0.0017,  0.9485, -0.0297, -1.4820]) tensor([ 0.0172,  0.7537, -0.0594, -1.1987]) False 1.0
tensor([ 0.0172,  0.7537, -0.0594, -1.1987]) tensor([ 0.0323,  0.5594, -0.0833, -0.9252]) False 1.0
tensor([ 0.0323,  0.5594, -0.0833, -0.9252]) tensor([ 0.0435,  0.3655, -0.1019, -0.6598]) False 1.0
tensor([ 0.0435,  0.3655, -0.1019, -0.6598]) tensor([ 0.0508,  0.5619, -0.1150, -0.9828]) False 1.0
tensor([ 0.0508,  0.5619, -0.1150, -0.9828]) tensor([ 0.0620,  0.3685, -0.1347, -0.7283]) False 1.0
tensor([ 0.0620,  0.3685, -0.1347, -0.7283]) tensor([ 0.0694,  0.5652, -0.1493, -1.0602]) False 1.0
tensor([ 0.0694,  0.5

  3%|██▏                                                                               | 79/3000 [00:01<00:32, 88.62it/s, control=0, cost=-10]

tensor([-0.0238, -0.1393, -0.0430,  0.1439]) tensor([-0.0265, -0.3338, -0.0401,  0.4227]) False 1.0
tensor([-0.0265, -0.3338, -0.0401,  0.4227]) tensor([-0.0332, -0.5283, -0.0317,  0.7024]) False 1.0
tensor([-0.0332, -0.5283, -0.0317,  0.7024]) tensor([-0.0438, -0.7230, -0.0176,  0.9850]) False 1.0
tensor([-0.0438, -0.7230, -0.0176,  0.9850]) tensor([-0.0582, -0.9178,  0.0021,  1.2721]) False 1.0
tensor([-0.0582, -0.9178,  0.0021,  1.2721]) tensor([-0.0766, -1.1130,  0.0275,  1.5654]) False 1.0
tensor([-0.0766, -1.1130,  0.0275,  1.5654]) tensor([-0.0989, -1.3084,  0.0588,  1.8666]) False 1.0
tensor([-0.0989, -1.3084,  0.0588,  1.8666]) tensor([-0.1250, -1.5042,  0.0962,  2.1769]) False 1.0
tensor([-0.1250, -1.5042,  0.0962,  2.1769]) tensor([-0.1551, -1.7001,  0.1397,  2.4977]) False 1.0
tensor([-0.1551, -1.7001,  0.1397,  2.4977]) tensor([-0.1891, -1.8960,  0.1897,  2.8297]) False 1.0
tensor([-0.1891, -1.8960,  0.1897,  2.8297]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0110, -0.4

  3%|██▋                                                                               | 99/3000 [00:01<00:33, 86.09it/s, control=0, cost=-22]

tensor([-0.1062, -0.7630,  0.1752,  1.3458]) tensor([-0.1215, -0.5704,  0.2021,  1.1126]) False 1.0
tensor([-0.1215, -0.5704,  0.2021,  1.1126]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0119,  0.0329, -0.0431, -0.0429]) tensor([-0.0113, -0.1616, -0.0440,  0.2359]) False 1.0
tensor([-0.0113, -0.1616, -0.0440,  0.2359]) tensor([-0.0145, -0.3560, -0.0392,  0.5144]) False 1.0
tensor([-0.0145, -0.3560, -0.0392,  0.5144]) tensor([-0.0216, -0.1604, -0.0290,  0.2096]) False 1.0
tensor([-0.0216, -0.1604, -0.0290,  0.2096]) tensor([-0.0248,  0.0351, -0.0248, -0.0921]) False 1.0
tensor([-0.0248,  0.0351, -0.0248, -0.0921]) tensor([-0.0241, -0.1596, -0.0266,  0.1927]) False 1.0
tensor([-0.0241, -0.1596, -0.0266,  0.1927]) tensor([-0.0273, -0.3544, -0.0228,  0.4769]) False 1.0
tensor([-0.0273, -0.3544, -0.0228,  0.4769]) tensor([-0.0344, -0.5491, -0.0132,  0.7623]) False 1.0
tensor([-0.0344, -0.5491, -0.0132,  0.7623]) tensor([-0.0454, -0.3538,  0.0020,  0.4655]) False 1.0
tensor([-0.0454, -0.3

  4%|███▏                                                                             | 117/3000 [00:01<00:34, 82.95it/s, control=0, cost=-34]

tensor([-0.0523,  0.4055,  0.0386, -0.4855]) tensor([-0.0442,  0.2098,  0.0289, -0.1809]) False 1.0
tensor([-0.0442,  0.2098,  0.0289, -0.1809]) tensor([-0.0400,  0.0143,  0.0253,  0.1208]) False 1.0
tensor([-0.0400,  0.0143,  0.0253,  0.1208]) tensor([-0.0397, -0.1812,  0.0277,  0.4213]) False 1.0
tensor([-0.0397, -0.1812,  0.0277,  0.4213]) tensor([-0.0433,  0.0136,  0.0361,  0.1375]) False 1.0
tensor([-0.0433,  0.0136,  0.0361,  0.1375]) tensor([-0.0431,  0.2081,  0.0389, -0.1436]) False 1.0
tensor([-0.0431,  0.2081,  0.0389, -0.1436]) tensor([-0.0389,  0.4027,  0.0360, -0.4237]) False 1.0
tensor([-0.0389,  0.4027,  0.0360, -0.4237]) tensor([-0.0308,  0.2071,  0.0275, -0.1199]) False 1.0
tensor([-0.0308,  0.2071,  0.0275, -0.1199]) tensor([-0.0267,  0.4018,  0.0251, -0.4038]) False 1.0
tensor([-0.0267,  0.4018,  0.0251, -0.4038]) tensor([-0.0187,  0.2063,  0.0171, -0.1033]) False 1.0
tensor([-0.0187,  0.2063,  0.0171, -0.1033]) tensor([-0.0145,  0.0110,  0.0150,  0.1947]) False 1.0


  4%|███▋                                                                             | 135/3000 [00:01<00:35, 81.50it/s, control=0, cost=-26]

tensor([-0.0147, -0.4090,  0.0214,  0.6232]) tensor([-0.0229, -0.6044,  0.0339,  0.9225]) False 1.0
tensor([-0.0229, -0.6044,  0.0339,  0.9225]) tensor([-0.0350, -0.8000,  0.0523,  1.2257]) False 1.0
tensor([-0.0350, -0.8000,  0.0523,  1.2257]) tensor([-0.0510, -0.9958,  0.0768,  1.5343]) False 1.0
tensor([-0.0510, -0.9958,  0.0768,  1.5343]) tensor([-0.0709, -0.8016,  0.1075,  1.2665]) False 1.0
tensor([-0.0709, -0.8016,  0.1075,  1.2665]) tensor([-0.0869, -0.9980,  0.1328,  1.5908]) False 1.0
tensor([-0.0869, -0.9980,  0.1328,  1.5908]) tensor([-0.1069, -1.1944,  0.1647,  1.9218]) False 1.0
tensor([-0.1069, -1.1944,  0.1647,  1.9218]) tensor([-0.1308, -1.3908,  0.2031,  2.2607]) False 1.0
tensor([-0.1308, -1.3908,  0.2031,  2.2607]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0296, -0.3551, -0.0384,  0.5917]) tensor([-0.0367, -0.5497, -0.0266,  0.8720]) False 1.0
tensor([-0.0367, -0.5497, -0.0266,  0.8720]) tensor([-0.0476, -0.3542, -0.0091,  0.5711]) False 1.0
tensor([-0.0476, -0.3

  5%|███▉                                                                             | 144/3000 [00:01<00:35, 80.63it/s, control=0, cost=-10]

tensor([-0.0143,  0.5371, -0.0178, -0.9226]) tensor([-0.0036,  0.3423, -0.0363, -0.6356]) False 1.0
tensor([-0.0036,  0.3423, -0.0363, -0.6356]) tensor([ 0.0033,  0.1477, -0.0490, -0.3546]) False 1.0
tensor([ 0.0033,  0.1477, -0.0490, -0.3546]) tensor([ 0.0062,  0.3434, -0.0561, -0.6623]) False 1.0
tensor([ 0.0062,  0.3434, -0.0561, -0.6623]) tensor([ 0.0131,  0.5393, -0.0693, -0.9721]) False 1.0
tensor([ 0.0131,  0.5393, -0.0693, -0.9721]) tensor([ 0.0239,  0.7353, -0.0888, -1.2857]) False 1.0
tensor([ 0.0239,  0.7353, -0.0888, -1.2857]) tensor([ 0.0386,  0.9314, -0.1145, -1.6048]) False 1.0
tensor([ 0.0386,  0.9314, -0.1145, -1.6048]) tensor([ 0.0572,  1.1277, -0.1466, -1.9309]) False 1.0
tensor([ 0.0572,  1.1277, -0.1466, -1.9309]) tensor([ 0.0798,  0.9344, -0.1852, -1.6870]) False 1.0
tensor([ 0.0798,  0.9344, -0.1852, -1.6870]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0222,  0.0007, -0.0289,  0.0249]) tensor([-0.0222,  0.1962, -0.0284, -0.2767]) False 1.0
tensor([-0.0222,  0.1

  5%|████▎                                                                            | 162/3000 [00:02<00:35, 80.03it/s, control=0, cost=-37]

tensor([ 0.0542,  0.2117, -0.0686, -0.3449]) tensor([ 0.0585,  0.4077, -0.0755, -0.6584]) False 1.0
tensor([ 0.0585,  0.4077, -0.0755, -0.6584]) tensor([ 0.0666,  0.6038, -0.0887, -0.9739]) False 1.0
tensor([ 0.0666,  0.6038, -0.0887, -0.9739]) tensor([ 0.0787,  0.8000, -0.1082, -1.2931]) False 1.0
tensor([ 0.0787,  0.8000, -0.1082, -1.2931]) tensor([ 0.0947,  0.9963, -0.1340, -1.6176]) False 1.0
tensor([ 0.0947,  0.9963, -0.1340, -1.6176]) tensor([ 0.1146,  0.8030, -0.1664, -1.3695]) False 1.0
tensor([ 0.1146,  0.8030, -0.1664, -1.3695]) tensor([ 0.1307,  0.9998, -0.1938, -1.7092]) False 1.0
tensor([ 0.1307,  0.9998, -0.1938, -1.7092]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0443, -0.4083, -0.0235,  0.6095]) tensor([ 0.0362, -0.6030, -0.0113,  0.8947]) False 1.0
tensor([ 0.0362, -0.6030, -0.0113,  0.8947]) tensor([ 0.0241, -0.4078,  0.0066,  0.5985]) False 1.0
tensor([ 0.0241, -0.4078,  0.0066,  0.5985]) tensor([ 0.0160, -0.2127,  0.0185,  0.3078]) False 1.0
tensor([ 0.0160, -0.2

  6%|████▊                                                                            | 179/3000 [00:02<00:37, 74.73it/s, control=0, cost=-17]

tensor([-0.0432,  0.1564, -0.0523, -0.3838]) tensor([-0.0401, -0.0379, -0.0599, -0.1080]) False 1.0
tensor([-0.0401, -0.0379, -0.0599, -0.1080]) tensor([-0.0408, -0.2322, -0.0621,  0.1651]) False 1.0
tensor([-0.0408, -0.2322, -0.0621,  0.1651]) tensor([-0.0455, -0.4263, -0.0588,  0.4376]) False 1.0
tensor([-0.0455, -0.4263, -0.0588,  0.4376]) tensor([-0.0540, -0.6206, -0.0501,  0.7112]) False 1.0
tensor([-0.0540, -0.6206, -0.0501,  0.7112]) tensor([-0.0664, -0.8150, -0.0358,  0.9877]) False 1.0
tensor([-0.0664, -0.8150, -0.0358,  0.9877]) tensor([-0.0827, -1.0096, -0.0161,  1.2689]) False 1.0
tensor([-0.0827, -1.0096, -0.0161,  1.2689]) tensor([-0.1029, -0.8143,  0.0093,  0.9712]) False 1.0
tensor([-0.1029, -0.8143,  0.0093,  0.9712]) tensor([-0.1192, -1.0095,  0.0287,  1.2668]) False 1.0
tensor([-0.1192, -1.0095,  0.0287,  1.2668]) tensor([-0.1394, -1.2050,  0.0541,  1.5684]) False 1.0
tensor([-0.1394, -1.2050,  0.0541,  1.5684]) tensor([-0.1635, -1.0106,  0.0854,  1.2930]) False 1.0


  6%|█████▎                                                                           | 195/3000 [00:02<00:37, 75.49it/s, control=0, cost=-24]

tensor([ 0.0083,  0.4242, -0.0475, -0.5903]) tensor([ 0.0168,  0.2298, -0.0593, -0.3130]) False 1.0
tensor([ 0.0168,  0.2298, -0.0593, -0.3130]) tensor([ 0.0214,  0.0355, -0.0656, -0.0396]) False 1.0
tensor([ 0.0214,  0.0355, -0.0656, -0.0396]) tensor([ 0.0221,  0.2315, -0.0664, -0.3522]) False 1.0
tensor([ 0.0221,  0.2315, -0.0664, -0.3522]) tensor([ 0.0267,  0.0374, -0.0734, -0.0812]) False 1.0
tensor([ 0.0267,  0.0374, -0.0734, -0.0812]) tensor([ 0.0275, -0.1566, -0.0750,  0.1875]) False 1.0
tensor([ 0.0275, -0.1566, -0.0750,  0.1875]) tensor([ 0.0243,  0.0395, -0.0713, -0.1279]) False 1.0
tensor([ 0.0243,  0.0395, -0.0713, -0.1279]) tensor([ 0.0251, -0.1545, -0.0738,  0.1415]) False 1.0
tensor([ 0.0251, -0.1545, -0.0738,  0.1415]) tensor([ 0.0220, -0.3485, -0.0710,  0.4100]) False 1.0
tensor([ 0.0220, -0.3485, -0.0710,  0.4100]) tensor([ 0.0151, -0.1524, -0.0628,  0.0958]) False 1.0
tensor([ 0.0151, -0.1524, -0.0628,  0.0958]) tensor([ 0.0120,  0.0435, -0.0609, -0.2160]) False 1.0


  7%|█████▌                                                                           | 204/3000 [00:02<00:36, 76.97it/s, control=0, cost=-10]

tensor([ 0.0346, -0.2132, -0.0646,  0.1636]) tensor([ 0.0303, -0.4074, -0.0614,  0.4352]) False 1.0
tensor([ 0.0303, -0.4074, -0.0614,  0.4352]) tensor([ 0.0222, -0.6016, -0.0527,  0.7080]) False 1.0
tensor([ 0.0222, -0.6016, -0.0527,  0.7080]) tensor([ 0.0101, -0.4058, -0.0385,  0.3992]) False 1.0
tensor([ 0.0101, -0.4058, -0.0385,  0.3992]) tensor([ 0.0020, -0.2101, -0.0305,  0.0946]) False 1.0
tensor([ 0.0020, -0.2101, -0.0305,  0.0946]) tensor([-0.0022, -0.0146, -0.0286, -0.2075]) False 1.0
tensor([-0.0022, -0.0146, -0.0286, -0.2075]) tensor([-0.0025, -0.2093, -0.0328,  0.0760]) False 1.0
tensor([-0.0025, -0.2093, -0.0328,  0.0760]) tensor([-0.0067, -0.4039, -0.0313,  0.3581]) False 1.0
tensor([-0.0067, -0.4039, -0.0313,  0.3581]) tensor([-0.0148, -0.2084, -0.0241,  0.0558]) False 1.0
tensor([-0.0148, -0.2084, -0.0241,  0.0558]) tensor([-0.0189, -0.0129, -0.0230, -0.2444]) False 1.0
tensor([-0.0189, -0.0129, -0.0230, -0.2444]) tensor([-0.0192, -0.2077, -0.0279,  0.0409]) False 1.0


  7%|█████▉                                                                           | 220/3000 [00:02<00:40, 69.34it/s, control=0, cost=-10]

tensor([-0.1243, -1.1540,  0.1871,  2.0055]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0368, -0.3582,  0.0466,  0.5881]) tensor([-0.0439, -0.5540,  0.0583,  0.8951]) False 1.0
tensor([-0.0439, -0.5540,  0.0583,  0.8951]) tensor([-0.0550, -0.7498,  0.0762,  1.2055]) False 1.0
tensor([-0.0550, -0.7498,  0.0762,  1.2055]) tensor([-0.0700, -0.5558,  0.1004,  0.9377]) False 1.0
tensor([-0.0700, -0.5558,  0.1004,  0.9377]) tensor([-0.0811, -0.3622,  0.1191,  0.6781]) False 1.0
tensor([-0.0811, -0.3622,  0.1191,  0.6781]) tensor([-0.0884, -0.5587,  0.1327,  1.0058]) False 1.0
tensor([-0.0884, -0.5587,  0.1327,  1.0058]) tensor([-0.0996, -0.3656,  0.1528,  0.7575]) False 1.0
tensor([-0.0996, -0.3656,  0.1528,  0.7575]) tensor([-0.1069, -0.5624,  0.1679,  1.0941]) False 1.0
tensor([-0.1069, -0.5624,  0.1679,  1.0941]) tensor([-0.1181, -0.7593,  0.1898,  1.4345]) False 1.0
tensor([-0.1181, -0.7593,  0.1898,  1.4345]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0372, -0.0324, -0.0285,  0.0255

  8%|██████▏                                                                          | 228/3000 [00:03<00:44, 61.72it/s, control=0, cost=-10]

tensor([-0.1353, -0.6200,  0.2015,  1.2447]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0174,  0.0306,  0.0297,  0.0438]) tensor([-0.0168, -0.1650,  0.0306,  0.3457]) False 1.0
tensor([-0.0168, -0.1650,  0.0306,  0.3457]) tensor([-0.0201,  0.0297,  0.0375,  0.0628]) False 1.0
tensor([-0.0201,  0.0297,  0.0375,  0.0628]) tensor([-0.0195, -0.1659,  0.0388,  0.3671]) False 1.0
tensor([-0.0195, -0.1659,  0.0388,  0.3671]) tensor([-0.0228,  0.0286,  0.0461,  0.0869]) False 1.0
tensor([-0.0228,  0.0286,  0.0461,  0.0869]) tensor([-0.0222,  0.2230,  0.0479, -0.1909]) False 1.0
tensor([-0.0222,  0.2230,  0.0479, -0.1909]) tensor([-0.0178,  0.4175,  0.0440, -0.4681]) False 1.0
tensor([-0.0178,  0.4175,  0.0440, -0.4681]) tensor([-0.0094,  0.2217,  0.0347, -0.1619]) False 1.0
tensor([-0.0094,  0.2217,  0.0347, -0.1619]) tensor([-0.0050,  0.4163,  0.0314, -0.4434]) False 1.0
tensor([-0.0050,  0.4163,  0.0314, -0.4434]) tensor([ 0.0033,  0.6110,  0.0226, -0.7260]) False 1.0
tensor([ 0.0033,  0.6

  8%|██████▌                                                                          | 242/3000 [00:03<00:45, 60.54it/s, control=0, cost=-20]

tensor([-0.0145,  0.0187,  0.1709,  0.5036]) tensor([-0.0141,  0.2111,  0.1810,  0.2693]) False 1.0
tensor([-0.0141,  0.2111,  0.1810,  0.2693]) tensor([-0.0099,  0.4032,  0.1863,  0.0387]) False 1.0
tensor([-0.0099,  0.4032,  0.1863,  0.0387]) tensor([-0.0018,  0.5952,  0.1871, -0.1899]) False 1.0
tensor([-0.0018,  0.5952,  0.1871, -0.1899]) tensor([0.0101, 0.3980, 0.1833, 0.1555]) False 1.0
tensor([0.0101, 0.3980, 0.1833, 0.1555]) tensor([0.0180, 0.2008, 0.1864, 0.4999]) False 1.0
tensor([0.0180, 0.2008, 0.1864, 0.4999]) tensor([0.0220, 0.0036, 0.1964, 0.8451]) False 1.0
tensor([0.0220, 0.0036, 0.1964, 0.8451]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0129,  0.4325,  0.0114, -0.5863]) tensor([-0.0042,  0.2372, -0.0003, -0.2900]) False 1.0
tensor([-0.0042,  0.2372, -0.0003, -0.2900]) tensor([ 5.3944e-04,  4.3236e-01, -6.1418e-03, -5.8284e-01]) False 1.0
tensor([ 5.3944e-04,  4.3236e-01, -6.1418e-03, -5.8284e-01]) tensor([ 0.0092,  0.2373, -0.0178, -0.2921]) False 1.0
tensor([ 0.00

  9%|██████▉                                                                          | 257/3000 [00:03<00:43, 62.91it/s, control=0, cost=-10]

tensor([-0.0107,  0.0483,  0.0314,  0.0492]) tensor([-0.0098, -0.1472,  0.0324,  0.3516]) False 1.0
tensor([-0.0098, -0.1472,  0.0324,  0.3516]) tensor([-0.0127,  0.0474,  0.0394,  0.0693]) False 1.0
tensor([-0.0127,  0.0474,  0.0394,  0.0693]) tensor([-0.0118,  0.2420,  0.0408, -0.2106]) False 1.0
tensor([-0.0118,  0.2420,  0.0408, -0.2106]) tensor([-0.0069,  0.0463,  0.0366,  0.0946]) False 1.0
tensor([-0.0069,  0.0463,  0.0366,  0.0946]) tensor([-0.0060,  0.2409,  0.0385, -0.1863]) False 1.0
tensor([-0.0060,  0.2409,  0.0385, -0.1863]) tensor([-0.0012,  0.0452,  0.0348,  0.1183]) False 1.0
tensor([-0.0012,  0.0452,  0.0348,  0.1183]) tensor([-0.0003,  0.2398,  0.0371, -0.1632]) False 1.0
tensor([-0.0003,  0.2398,  0.0371, -0.1632]) tensor([0.0045, 0.0442, 0.0339, 0.1409]) False 1.0
tensor([0.0045, 0.0442, 0.0339, 0.1409]) tensor([ 0.0054,  0.2388,  0.0367, -0.1409]) False 1.0
tensor([ 0.0054,  0.2388,  0.0367, -0.1409]) tensor([0.0102, 0.0432, 0.0339, 0.1632]) False 1.0
tensor([0.01

  9%|███████▎                                                                         | 271/3000 [00:03<00:43, 63.43it/s, control=0, cost=-46]

tensor([-0.0125,  0.0337, -0.0636, -0.2814]) tensor([-0.0119,  0.2296, -0.0692, -0.5934]) False 1.0
tensor([-0.0119,  0.2296, -0.0692, -0.5934]) tensor([-0.0073,  0.0356, -0.0811, -0.3234]) False 1.0
tensor([-0.0073,  0.0356, -0.0811, -0.3234]) tensor([-0.0066,  0.2317, -0.0876, -0.6405]) False 1.0
tensor([-0.0066,  0.2317, -0.0876, -0.6405]) tensor([-0.0019,  0.0379, -0.1004, -0.3766]) False 1.0
tensor([-0.0019,  0.0379, -0.1004, -0.3766]) tensor([-0.0012, -0.1556, -0.1079, -0.1172]) False 1.0
tensor([-0.0012, -0.1556, -0.1079, -0.1172]) tensor([-0.0043, -0.3491, -0.1103,  0.1396]) False 1.0
tensor([-0.0043, -0.3491, -0.1103,  0.1396]) tensor([-0.0113, -0.1525, -0.1075, -0.1857]) False 1.0
tensor([-0.0113, -0.1525, -0.1075, -0.1857]) tensor([-0.0143,  0.0439, -0.1112, -0.5103]) False 1.0
tensor([-0.0143,  0.0439, -0.1112, -0.5103]) tensor([-0.0134,  0.2404, -0.1214, -0.8358]) False 1.0
tensor([-0.0134,  0.2404, -0.1214, -0.8358]) tensor([-0.0086,  0.4370, -0.1381, -1.1641]) False 1.0


  9%|███████▌                                                                         | 278/3000 [00:03<00:44, 61.30it/s, control=0, cost=-13]

tensor([-0.0100,  0.0324,  0.0296,  0.0521]) tensor([-0.0094,  0.2270,  0.0307, -0.2310]) False 1.0
tensor([-0.0094,  0.2270,  0.0307, -0.2310]) tensor([-0.0048,  0.0315,  0.0261,  0.0712]) False 1.0
tensor([-0.0048,  0.0315,  0.0261,  0.0712]) tensor([-0.0042,  0.2262,  0.0275, -0.2132]) False 1.0
tensor([-0.0042,  0.2262,  0.0275, -0.2132]) tensor([ 3.1354e-04,  4.2095e-01,  2.3221e-02, -4.9708e-01]) False 1.0
tensor([ 3.1354e-04,  4.2095e-01,  2.3221e-02, -4.9708e-01]) tensor([ 0.0087,  0.6157,  0.0133, -0.7824]) False 1.0
tensor([ 0.0087,  0.6157,  0.0133, -0.7824]) tensor([ 0.0210,  0.8107, -0.0024, -1.0708]) False 1.0
tensor([ 0.0210,  0.8107, -0.0024, -1.0708]) tensor([ 0.0373,  0.6156, -0.0238, -0.7789]) False 1.0
tensor([ 0.0373,  0.6156, -0.0238, -0.7789]) tensor([ 0.0496,  0.8110, -0.0394, -1.0790]) False 1.0
tensor([ 0.0496,  0.8110, -0.0394, -1.0790]) tensor([ 0.0658,  0.6164, -0.0609, -0.7989]) False 1.0
tensor([ 0.0658,  0.6164, -0.0609, -0.7989]) tensor([ 0.0781,  0.812

 10%|███████▉                                                                         | 292/3000 [00:04<00:43, 61.57it/s, control=0, cost=-23]

tensor([ 0.0695,  0.1545, -0.0299, -0.2716]) False 1.0
tensor([ 0.0695,  0.1545, -0.0299, -0.2716]) tensor([ 0.0725, -0.0402, -0.0353,  0.0115]) False 1.0
tensor([ 0.0725, -0.0402, -0.0353,  0.0115]) tensor([ 0.0717, -0.2348, -0.0351,  0.2928]) False 1.0
tensor([ 0.0717, -0.2348, -0.0351,  0.2928]) tensor([ 0.0670, -0.4294, -0.0292,  0.5743]) False 1.0
tensor([ 0.0670, -0.4294, -0.0292,  0.5743]) tensor([ 0.0585, -0.6241, -0.0177,  0.8576]) False 1.0
tensor([ 0.0585, -0.6241, -0.0177,  0.8576]) tensor([ 0.0460, -0.4287, -0.0006,  0.5594]) False 1.0
tensor([ 0.0460, -0.4287, -0.0006,  0.5594]) tensor([ 0.0374, -0.2336,  0.0106,  0.2666]) False 1.0
tensor([ 0.0374, -0.2336,  0.0106,  0.2666]) tensor([ 0.0327, -0.0386,  0.0160, -0.0228]) False 1.0
tensor([ 0.0327, -0.0386,  0.0160, -0.0228]) tensor([ 0.0320, -0.2340,  0.0155,  0.2749]) False 1.0
tensor([ 0.0320, -0.2340,  0.0155,  0.2749]) tensor([ 0.0273, -0.4293,  0.0210,  0.5724]) False 1.0
tensor([ 0.0273, -0.4293,  0.0210,  0.5724]) 

 10%|████████▎                                                                        | 307/3000 [00:04<00:41, 65.26it/s, control=0, cost=-10]

tensor([0.0244, 0.0063, 0.0592, 0.1174]) tensor([ 0.0245,  0.2006,  0.0616, -0.1560]) False 1.0
tensor([ 0.0245,  0.2006,  0.0616, -0.1560]) tensor([ 0.0285,  0.3947,  0.0585, -0.4287]) False 1.0
tensor([ 0.0285,  0.3947,  0.0585, -0.4287]) tensor([ 0.0364,  0.5890,  0.0499, -0.7024]) False 1.0
tensor([ 0.0364,  0.5890,  0.0499, -0.7024]) tensor([ 0.0482,  0.7834,  0.0358, -0.9789]) False 1.0
tensor([ 0.0482,  0.7834,  0.0358, -0.9789]) tensor([ 0.0639,  0.5878,  0.0163, -0.6752]) False 1.0
tensor([ 0.0639,  0.5878,  0.0163, -0.6752]) tensor([ 0.0756,  0.3925,  0.0028, -0.3775]) False 1.0
tensor([ 0.0756,  0.3925,  0.0028, -0.3775]) tensor([ 0.0835,  0.1973, -0.0048, -0.0839]) False 1.0
tensor([ 0.0835,  0.1973, -0.0048, -0.0839]) tensor([ 0.0874,  0.3925, -0.0065, -0.3781]) False 1.0
tensor([ 0.0874,  0.3925, -0.0065, -0.3781]) tensor([ 0.0953,  0.1975, -0.0140, -0.0874]) False 1.0
tensor([ 0.0953,  0.1975, -0.0140, -0.0874]) tensor([ 0.0992,  0.3928, -0.0158, -0.3845]) False 1.0
tens

 11%|████████▋                                                                        | 321/3000 [00:04<00:40, 65.75it/s, control=0, cost=-11]

tensor([-0.0590, -0.0209,  0.0255,  0.0268]) tensor([-0.0594, -0.2163,  0.0261,  0.3274]) False 1.0
tensor([-0.0594, -0.2163,  0.0261,  0.3274]) tensor([-0.0637, -0.0216,  0.0326,  0.0430]) False 1.0
tensor([-0.0637, -0.0216,  0.0326,  0.0430]) tensor([-0.0641, -0.2172,  0.0335,  0.3458]) False 1.0
tensor([-0.0641, -0.2172,  0.0335,  0.3458]) tensor([-0.0685, -0.4128,  0.0404,  0.6489]) False 1.0
tensor([-0.0685, -0.4128,  0.0404,  0.6489]) tensor([-0.0767, -0.6084,  0.0534,  0.9540]) False 1.0
tensor([-0.0767, -0.6084,  0.0534,  0.9540]) tensor([-0.0889, -0.8042,  0.0725,  1.2630]) False 1.0
tensor([-0.0889, -0.8042,  0.0725,  1.2630]) tensor([-0.1050, -1.0002,  0.0977,  1.5774]) False 1.0
tensor([-0.1050, -1.0002,  0.0977,  1.5774]) tensor([-0.1250, -0.8064,  0.1293,  1.3168]) False 1.0
tensor([-0.1250, -0.8064,  0.1293,  1.3168]) tensor([-0.1411, -1.0028,  0.1556,  1.6469]) False 1.0
tensor([-0.1411, -1.0028,  0.1556,  1.6469]) tensor([-0.1612, -1.1994,  0.1885,  1.9838]) False 1.0


 11%|████████▉                                                                        | 330/3000 [00:04<00:37, 71.06it/s, control=0, cost=-10]

tensor([ 0.0275, -0.0360, -0.0198, -0.0170]) tensor([ 0.0268, -0.2308, -0.0201,  0.2694]) False 1.0
tensor([ 0.0268, -0.2308, -0.0201,  0.2694]) tensor([ 0.0222, -0.4257, -0.0147,  0.5557]) False 1.0
tensor([ 0.0222, -0.4257, -0.0147,  0.5557]) tensor([ 0.0137, -0.6206, -0.0036,  0.8437]) False 1.0
tensor([ 0.0137, -0.6206, -0.0036,  0.8437]) tensor([ 0.0013, -0.8157,  0.0133,  1.1352]) False 1.0
tensor([ 0.0013, -0.8157,  0.0133,  1.1352]) tensor([-0.0150, -1.0109,  0.0360,  1.4320]) False 1.0
tensor([-0.0150, -1.0109,  0.0360,  1.4320]) tensor([-0.0353, -1.2065,  0.0646,  1.7357]) False 1.0
tensor([-0.0353, -1.2065,  0.0646,  1.7357]) tensor([-0.0594, -1.0122,  0.0993,  1.4638]) False 1.0
tensor([-0.0594, -1.0122,  0.0993,  1.4638]) tensor([-0.0796, -1.2084,  0.1286,  1.7858]) False 1.0
tensor([-0.0796, -1.2084,  0.1286,  1.7858]) tensor([-0.1038, -1.4047,  0.1643,  2.1155]) False 1.0
tensor([-0.1038, -1.4047,  0.1643,  2.1155]) tensor([-0.1319, -1.6010,  0.2066,  2.4542]) False 1.0


 12%|█████████▎                                                                       | 345/3000 [00:04<00:47, 55.88it/s, control=0, cost=-20]

tensor([ 0.0964,  0.5877, -0.0942, -1.0028]) tensor([ 0.1081,  0.7840, -0.1143, -1.3235]) False 1.0
tensor([ 0.1081,  0.7840, -0.1143, -1.3235]) tensor([ 0.1238,  0.5905, -0.1407, -1.0687]) False 1.0
tensor([ 0.1238,  0.5905, -0.1407, -1.0687]) tensor([ 0.1356,  0.7872, -0.1621, -1.4020]) False 1.0
tensor([ 0.1356,  0.7872, -0.1621, -1.4020]) tensor([ 0.1514,  0.5944, -0.1902, -1.1641]) False 1.0
tensor([ 0.1514,  0.5944, -0.1902, -1.1641]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0156, -0.3687,  0.0132,  0.6127]) tensor([-0.0230, -0.5640,  0.0255,  0.9096]) False 1.0
tensor([-0.0230, -0.5640,  0.0255,  0.9096]) tensor([-0.0342, -0.7595,  0.0437,  1.2101]) False 1.0
tensor([-0.0342, -0.7595,  0.0437,  1.2101]) tensor([-0.0494, -0.5650,  0.0679,  0.9315]) False 1.0
tensor([-0.0494, -0.5650,  0.0679,  0.9315]) tensor([-0.0607, -0.7609,  0.0865,  1.2447]) False 1.0
tensor([-0.0607, -0.7609,  0.0865,  1.2447]) tensor([-0.0759, -0.9570,  0.1114,  1.5632]) False 1.0
tensor([-0.0759, -0.9

 12%|█████████▍                                                                       | 351/3000 [00:05<00:48, 54.06it/s, control=0, cost=-10]

tensor([ 0.0365,  1.1462, -0.0689, -1.6004]) tensor([ 0.0594,  0.9520, -0.1010, -1.3300]) False 1.0
tensor([ 0.0594,  0.9520, -0.1010, -1.3300]) tensor([ 0.0785,  0.7582, -0.1276, -1.0706]) False 1.0
tensor([ 0.0785,  0.7582, -0.1276, -1.0706]) tensor([ 0.0937,  0.9548, -0.1490, -1.4004]) False 1.0
tensor([ 0.0937,  0.9548, -0.1490, -1.4004]) tensor([ 0.1127,  0.7618, -0.1770, -1.1578]) False 1.0
tensor([ 0.1127,  0.7618, -0.1770, -1.1578]) tensor([ 0.1280,  0.9587, -0.2001, -1.5003]) False 1.0
tensor([ 0.1280,  0.9587, -0.2001, -1.5003]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0389, -0.0255,  0.0269,  0.0525]) tensor([ 0.0383, -0.2210,  0.0280,  0.3536]) False 1.0
tensor([ 0.0383, -0.2210,  0.0280,  0.3536]) tensor([ 0.0339, -0.0263,  0.0350,  0.0698]) False 1.0
tensor([ 0.0339, -0.0263,  0.0350,  0.0698]) tensor([ 0.0334, -0.2219,  0.0364,  0.3734]) False 1.0
tensor([ 0.0334, -0.2219,  0.0364,  0.3734]) tensor([ 0.0290, -0.4176,  0.0439,  0.6773]) False 1.0
tensor([ 0.0290, -0.4

 12%|█████████▊                                                                       | 363/3000 [00:05<00:49, 53.09it/s, control=0, cost=-10]

tensor([ 0.1337,  0.7691, -0.1691, -1.2673]) tensor([ 0.1491,  0.9659, -0.1944, -1.6079]) False 1.0
tensor([ 0.1491,  0.9659, -0.1944, -1.6079]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0015, -0.4116,  0.0468,  0.6568]) tensor([-0.0097, -0.2171,  0.0599,  0.3792]) False 1.0
tensor([-0.0097, -0.2171,  0.0599,  0.3792]) tensor([-0.0141, -0.4130,  0.0675,  0.6901]) False 1.0
tensor([-0.0141, -0.4130,  0.0675,  0.6901]) tensor([-0.0223, -0.6090,  0.0813,  1.0033]) False 1.0
tensor([-0.0223, -0.6090,  0.0813,  1.0033]) tensor([-0.0345, -0.4151,  0.1014,  0.7372]) False 1.0
tensor([-0.0345, -0.4151,  0.1014,  0.7372]) tensor([-0.0428, -0.2215,  0.1161,  0.4781]) False 1.0
tensor([-0.0428, -0.2215,  0.1161,  0.4781]) tensor([-0.0472, -0.0282,  0.1257,  0.2241]) False 1.0
tensor([-0.0472, -0.0282,  0.1257,  0.2241]) tensor([-0.0478,  0.1649,  0.1302, -0.0264]) False 1.0
tensor([-0.0478,  0.1649,  0.1302, -0.0264]) tensor([-0.0445,  0.3580,  0.1296, -0.2754]) False 1.0
tensor([-0.0445,  0.3

 13%|██████████▏                                                                      | 376/3000 [00:05<00:48, 53.98it/s, control=0, cost=-50]

tensor([ 0.0167,  0.1751,  0.0306, -0.1440]) tensor([ 0.0202,  0.3698,  0.0277, -0.4268]) False 1.0
tensor([ 0.0202,  0.3698,  0.0277, -0.4268]) tensor([ 0.0276,  0.5645,  0.0192, -0.7107]) False 1.0
tensor([ 0.0276,  0.5645,  0.0192, -0.7107]) tensor([ 0.0389,  0.7594,  0.0050, -0.9972]) False 1.0
tensor([ 0.0389,  0.7594,  0.0050, -0.9972]) tensor([ 0.0540,  0.9544, -0.0150, -1.2884]) False 1.0
tensor([ 0.0540,  0.9544, -0.0150, -1.2884]) tensor([ 0.0731,  1.1498, -0.0408, -1.5857]) False 1.0
tensor([ 0.0731,  1.1498, -0.0408, -1.5857]) tensor([ 0.0961,  1.3453, -0.0725, -1.8908]) False 1.0
tensor([ 0.0961,  1.3453, -0.0725, -1.8908]) tensor([ 0.1230,  1.5412, -0.1103, -2.2051]) False 1.0
tensor([ 0.1230,  1.5412, -0.1103, -2.2051]) tensor([ 0.1539,  1.7372, -0.1544, -2.5296]) False 1.0
tensor([ 0.1539,  1.7372, -0.1544, -2.5296]) tensor([ 0.1886,  1.5436, -0.2050, -2.2879]) False 1.0
tensor([ 0.1886,  1.5436, -0.2050, -2.2879]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0148, -0.0

 13%|██████████▌                                                                      | 390/3000 [00:05<00:44, 59.17it/s, control=0, cost=-10]

tensor([-0.4986, -0.6660, -0.2089, -0.6647]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0185, -0.0306,  0.0283,  0.0335]) tensor([ 0.0179,  0.1641,  0.0290, -0.2502]) False 1.0
tensor([ 0.0179,  0.1641,  0.0290, -0.2502]) tensor([ 0.0212, -0.0315,  0.0240,  0.0515]) False 1.0
tensor([ 0.0212, -0.0315,  0.0240,  0.0515]) tensor([ 0.0206, -0.2269,  0.0250,  0.3517]) False 1.0
tensor([ 0.0206, -0.2269,  0.0250,  0.3517]) tensor([ 0.0160, -0.4224,  0.0321,  0.6521]) False 1.0
tensor([ 0.0160, -0.4224,  0.0321,  0.6521]) tensor([ 0.0076, -0.6179,  0.0451,  0.9547]) False 1.0
tensor([ 0.0076, -0.6179,  0.0451,  0.9547]) tensor([-0.0048, -0.8136,  0.0642,  1.2612]) False 1.0
tensor([-0.0048, -0.8136,  0.0642,  1.2612]) tensor([-0.0211, -0.6194,  0.0894,  0.9893]) False 1.0
tensor([-0.0211, -0.6194,  0.0894,  0.9893]) tensor([-0.0334, -0.4256,  0.1092,  0.7260]) False 1.0
tensor([-0.0334, -0.4256,  0.1092,  0.7260]) tensor([-0.0420, -0.6220,  0.1237,  1.0510]) False 1.0
tensor([-0.0420, -0.6

 13%|██████████▉                                                                      | 404/3000 [00:05<00:40, 63.38it/s, control=0, cost=-10]

tensor([-0.0517, -0.6292,  0.0686,  0.8143]) tensor([-0.0643, -0.4351,  0.0849,  0.5439]) False 1.0
tensor([-0.0643, -0.4351,  0.0849,  0.5439]) tensor([-0.0730, -0.6313,  0.0958,  0.8621]) False 1.0
tensor([-0.0730, -0.6313,  0.0958,  0.8621]) tensor([-0.0856, -0.8275,  0.1131,  1.1833]) False 1.0
tensor([-0.0856, -0.8275,  0.1131,  1.1833]) tensor([-0.1022, -0.6341,  0.1367,  0.9281]) False 1.0
tensor([-0.1022, -0.6341,  0.1367,  0.9281]) tensor([-0.1149, -0.8307,  0.1553,  1.2604]) False 1.0
tensor([-0.1149, -0.8307,  0.1553,  1.2604]) tensor([-0.1315, -1.0275,  0.1805,  1.5975]) False 1.0
tensor([-0.1315, -1.0275,  0.1805,  1.5975]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0203,  0.3830,  0.0358, -0.5113]) tensor([ 0.0280,  0.5776,  0.0256, -0.7924]) False 1.0
tensor([ 0.0280,  0.5776,  0.0256, -0.7924]) tensor([ 0.0396,  0.3821,  0.0097, -0.4918]) False 1.0
tensor([ 0.0396,  0.3821,  0.0097, -0.4918]) tensor([ 4.7197e-02,  1.8688e-01, -1.0912e-04, -1.9610e-01]) False 1.0
tenso

 14%|███████████                                                                      | 411/3000 [00:06<00:41, 61.64it/s, control=0, cost=-20]

tensor([ 0.1036,  0.5787, -0.1793, -1.0156]) tensor([ 0.1152,  0.7757, -0.1996, -1.3588]) False 1.0
tensor([ 0.1152,  0.7757, -0.1996, -1.3588]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0407, -0.0231, -0.0062, -0.0453]) tensor([ 0.0402,  0.1721, -0.0071, -0.3399]) False 1.0
tensor([ 0.0402,  0.1721, -0.0071, -0.3399]) tensor([ 0.0437,  0.3673, -0.0139, -0.6348]) False 1.0
tensor([ 0.0437,  0.3673, -0.0139, -0.6348]) tensor([ 0.0510,  0.5626, -0.0266, -0.9319]) False 1.0
tensor([ 0.0510,  0.5626, -0.0266, -0.9319]) tensor([ 0.0623,  0.3679, -0.0453, -0.6477]) False 1.0
tensor([ 0.0623,  0.3679, -0.0453, -0.6477]) tensor([ 0.0696,  0.1734, -0.0582, -0.3696]) False 1.0
tensor([ 0.0696,  0.1734, -0.0582, -0.3696]) tensor([ 0.0731,  0.3693, -0.0656, -0.6800]) False 1.0
tensor([ 0.0731,  0.3693, -0.0656, -0.6800]) tensor([ 0.0805,  0.5653, -0.0792, -0.9926]) False 1.0
tensor([ 0.0805,  0.5653, -0.0792, -0.9926]) tensor([ 0.0918,  0.7614, -0.0991, -1.3091]) False 1.0
tensor([ 0.0918,  0.7

 14%|███████████▌                                                                     | 426/3000 [00:06<00:38, 66.98it/s, control=0, cost=-12]

tensor([-0.0134, -0.6363,  0.0786,  0.9641]) tensor([-0.0261, -0.8323,  0.0978,  1.2804]) False 1.0
tensor([-0.0261, -0.8323,  0.0978,  1.2804]) tensor([-0.0427, -1.0286,  0.1234,  1.6020]) False 1.0
tensor([-0.0427, -1.0286,  0.1234,  1.6020]) tensor([-0.0633, -0.8351,  0.1555,  1.3502]) False 1.0
tensor([-0.0633, -0.8351,  0.1555,  1.3502]) tensor([-0.0800, -1.0318,  0.1825,  1.6873]) False 1.0
tensor([-0.0800, -1.0318,  0.1825,  1.6873]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0348,  0.0320,  0.0453,  0.0636]) tensor([-0.0342,  0.2264,  0.0466, -0.2144]) False 1.0
tensor([-0.0342,  0.2264,  0.0466, -0.2144]) tensor([-0.0296,  0.0307,  0.0423,  0.0926]) False 1.0
tensor([-0.0296,  0.0307,  0.0423,  0.0926]) tensor([-0.0290, -0.1650,  0.0441,  0.3983]) False 1.0
tensor([-0.0290, -0.1650,  0.0441,  0.3983]) tensor([-0.0323,  0.0294,  0.0521,  0.1198]) False 1.0
tensor([-0.0323,  0.0294,  0.0521,  0.1198]) tensor([-0.0317,  0.2238,  0.0545, -0.1560]) False 1.0
tensor([-0.0317,  0.2

 15%|███████████▉                                                                     | 443/3000 [00:06<00:35, 72.91it/s, control=0, cost=-10]

tensor([-0.0152,  0.3803,  0.0240, -0.5382]) tensor([-0.0076,  0.1849,  0.0132, -0.2380]) False 1.0
tensor([-0.0076,  0.1849,  0.0132, -0.2380]) tensor([-0.0039, -0.0104,  0.0085,  0.0588]) False 1.0
tensor([-0.0039, -0.0104,  0.0085,  0.0588]) tensor([-0.0041,  0.1846,  0.0096, -0.2312]) False 1.0
tensor([-0.0041,  0.1846,  0.0096, -0.2312]) tensor([-3.7444e-04,  3.7954e-01,  5.0064e-03, -5.2085e-01]) False 1.0
tensor([-3.7444e-04,  3.7954e-01,  5.0064e-03, -5.2085e-01]) tensor([ 0.0072,  0.1843, -0.0054, -0.2266]) False 1.0
tensor([ 0.0072,  0.1843, -0.0054, -0.2266]) tensor([ 0.0109, -0.0107, -0.0099,  0.0644]) False 1.0
tensor([ 0.0109, -0.0107, -0.0099,  0.0644]) tensor([ 0.0107,  0.1846, -0.0087, -0.2314]) False 1.0
tensor([ 0.0107,  0.1846, -0.0087, -0.2314]) tensor([ 0.0144,  0.3798, -0.0133, -0.5268]) False 1.0
tensor([ 0.0144,  0.3798, -0.0133, -0.5268]) tensor([ 0.0220,  0.5751, -0.0238, -0.8237]) False 1.0
tensor([ 0.0220,  0.5751, -0.0238, -0.8237]) tensor([ 0.0335,  0.770

 15%|████████████▍                                                                    | 459/3000 [00:06<00:36, 70.21it/s, control=0, cost=-10]

tensor([ 0.1095,  0.0069, -0.0467,  0.0845]) tensor([ 0.1097, -0.1875, -0.0451,  0.3621]) False 1.0
tensor([ 0.1097, -0.1875, -0.0451,  0.3621]) tensor([ 0.1059, -0.3820, -0.0378,  0.6402]) False 1.0
tensor([ 0.1059, -0.3820, -0.0378,  0.6402]) tensor([ 0.0983, -0.1864, -0.0250,  0.3359]) False 1.0
tensor([ 0.0983, -0.1864, -0.0250,  0.3359]) tensor([ 0.0945,  0.0091, -0.0183,  0.0354]) False 1.0
tensor([ 0.0945,  0.0091, -0.0183,  0.0354]) tensor([ 0.0947, -0.1857, -0.0176,  0.3222]) False 1.0
tensor([ 0.0947, -0.1857, -0.0176,  0.3222]) tensor([ 0.0910,  0.0096, -0.0111,  0.0241]) False 1.0
tensor([ 0.0910,  0.0096, -0.0111,  0.0241]) tensor([ 0.0912,  0.2049, -0.0107, -0.2721]) False 1.0
tensor([ 0.0912,  0.2049, -0.0107, -0.2721]) tensor([ 0.0953,  0.0099, -0.0161,  0.0172]) False 1.0
tensor([ 0.0953,  0.0099, -0.0161,  0.0172]) tensor([ 0.0955,  0.2053, -0.0158, -0.2805]) False 1.0
tensor([ 0.0955,  0.2053, -0.0158, -0.2805]) tensor([ 0.0996,  0.0104, -0.0214,  0.0071]) False 1.0


 16%|████████████▌                                                                    | 467/3000 [00:06<00:38, 65.40it/s, control=0, cost=-10]

tensor([ 0.0438, -0.0064,  0.0047,  0.0361]) tensor([ 0.0436,  0.1887,  0.0054, -0.2551]) False 1.0
tensor([ 0.0436,  0.1887,  0.0054, -0.2551]) tensor([ 0.0474, -0.0065,  0.0003,  0.0393]) False 1.0
tensor([ 0.0474, -0.0065,  0.0003,  0.0393]) tensor([ 0.0473,  0.1886,  0.0011, -0.2533]) False 1.0
tensor([ 0.0473,  0.1886,  0.0011, -0.2533]) tensor([ 0.0511, -0.0065, -0.0040,  0.0398]) False 1.0
tensor([ 0.0511, -0.0065, -0.0040,  0.0398]) tensor([ 0.0509, -0.2016, -0.0032,  0.3312]) False 1.0
tensor([ 0.0509, -0.2016, -0.0032,  0.3312]) tensor([ 0.0469, -0.0064,  0.0035,  0.0375]) False 1.0
tensor([ 0.0469, -0.0064,  0.0035,  0.0375]) tensor([ 0.0468,  0.1887,  0.0042, -0.2541]) False 1.0
tensor([ 0.0468,  0.1887,  0.0042, -0.2541]) tensor([ 0.0505, -0.0065, -0.0009,  0.0399]) False 1.0
tensor([ 0.0505, -0.0065, -0.0009,  0.0399]) tensor([ 5.0408e-02,  1.8861e-01, -7.8759e-05, -2.5303e-01]) False 1.0
tensor([ 5.0408e-02,  1.8861e-01, -7.8759e-05, -2.5303e-01]) tensor([ 0.0542, -0.006

 16%|█████████████                                                                    | 482/3000 [00:07<00:38, 65.93it/s, control=0, cost=-20]

tensor([-0.0285,  0.3426, -0.0219, -0.5722]) tensor([-0.0216,  0.1478, -0.0334, -0.2865]) False 1.0
tensor([-0.0216,  0.1478, -0.0334, -0.2865]) tensor([-0.0187, -0.0468, -0.0391, -0.0045]) False 1.0
tensor([-0.0187, -0.0468, -0.0391, -0.0045]) tensor([-0.0196, -0.2413, -0.0392,  0.2756]) False 1.0
tensor([-0.0196, -0.2413, -0.0392,  0.2756]) tensor([-0.0244, -0.4359, -0.0337,  0.5556]) False 1.0
tensor([-0.0244, -0.4359, -0.0337,  0.5556]) tensor([-0.0331, -0.6305, -0.0226,  0.8375]) False 1.0
tensor([-0.0331, -0.6305, -0.0226,  0.8375]) tensor([-0.0458, -0.8253, -0.0058,  1.1230]) False 1.0
tensor([-0.0458, -0.8253, -0.0058,  1.1230]) tensor([-0.0623, -1.0204,  0.0167,  1.4139]) False 1.0
tensor([-0.0623, -1.0204,  0.0167,  1.4139]) tensor([-0.0827, -1.2157,  0.0449,  1.7117]) False 1.0
tensor([-0.0827, -1.2157,  0.0449,  1.7117]) tensor([-0.1070, -1.4113,  0.0792,  2.0181]) False 1.0
tensor([-0.1070, -1.4113,  0.0792,  2.0181]) tensor([-0.1352, -1.6072,  0.1195,  2.3342]) False 1.0


 17%|█████████████▍                                                                   | 497/3000 [00:07<00:37, 66.26it/s, control=0, cost=-10]

tensor([-0.0394, -0.0327,  0.0389,  0.0534]) tensor([-0.0400, -0.2283,  0.0399,  0.3581]) False 1.0
tensor([-0.0400, -0.2283,  0.0399,  0.3581]) tensor([-0.0446, -0.4240,  0.0471,  0.6631]) False 1.0
tensor([-0.0446, -0.4240,  0.0471,  0.6631]) tensor([-0.0531, -0.2296,  0.0603,  0.3856]) False 1.0
tensor([-0.0531, -0.2296,  0.0603,  0.3856]) tensor([-0.0577, -0.0353,  0.0681,  0.1125]) False 1.0
tensor([-0.0577, -0.0353,  0.0681,  0.1125]) tensor([-0.0584,  0.1587,  0.0703, -0.1580]) False 1.0
tensor([-0.0584,  0.1587,  0.0703, -0.1580]) tensor([-0.0552,  0.3528,  0.0671, -0.4277]) False 1.0
tensor([-0.0552,  0.3528,  0.0671, -0.4277]) tensor([-0.0482,  0.5469,  0.0586, -0.6984]) False 1.0
tensor([-0.0482,  0.5469,  0.0586, -0.6984]) tensor([-0.0372,  0.7412,  0.0446, -0.9721]) False 1.0
tensor([-0.0372,  0.7412,  0.0446, -0.9721]) tensor([-0.0224,  0.9357,  0.0252, -1.2505]) False 1.0
tensor([-0.0224,  0.9357,  0.0252, -1.2505]) tensor([-3.6855e-03,  1.1304e+00,  1.6923e-04, -1.5352e

 17%|█████████████▉                                                                   | 514/3000 [00:07<00:35, 70.92it/s, control=0, cost=-10]

tensor([ 0.0715,  0.4327, -0.1341, -0.8636]) tensor([ 0.0801,  0.6293, -0.1514, -1.1953]) False 1.0
tensor([ 0.0801,  0.6293, -0.1514, -1.1953]) tensor([ 0.0927,  0.4365, -0.1753, -0.9536]) False 1.0
tensor([ 0.0927,  0.4365, -0.1753, -0.9536]) tensor([ 0.1014,  0.2441, -0.1944, -0.7207]) False 1.0
tensor([ 0.1014,  0.2441, -0.1944, -0.7207]) tensor([ 0.1063,  0.4413, -0.2088, -1.0677]) False 1.0
tensor([ 0.1063,  0.4413, -0.2088, -1.0677]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0295, -0.4205, -0.0333,  0.5206]) tensor([-0.0379, -0.6152, -0.0229,  0.8026]) False 1.0
tensor([-0.0379, -0.6152, -0.0229,  0.8026]) tensor([-0.0502, -0.8100, -0.0069,  1.0880]) False 1.0
tensor([-0.0502, -0.8100, -0.0069,  1.0880]) tensor([-0.0664, -1.0050,  0.0149,  1.3785]) False 1.0
tensor([-0.0664, -1.0050,  0.0149,  1.3785]) tensor([-0.0865, -1.2003,  0.0425,  1.6758]) False 1.0
tensor([-0.0865, -1.2003,  0.0425,  1.6758]) tensor([-0.1105, -1.3959,  0.0760,  1.9814]) False 1.0
tensor([-0.1105, -1.3

 17%|██████████████                                                                   | 522/3000 [00:07<00:37, 66.05it/s, control=0, cost=-20]

tensor([ 0.0738, -0.1617, -0.0451,  0.1477]) tensor([ 0.0706,  0.0341, -0.0421, -0.1589]) False 1.0
tensor([ 0.0706,  0.0341, -0.0421, -0.1589]) tensor([ 0.0713, -0.1604, -0.0453,  0.1202]) False 1.0
tensor([ 0.0713, -0.1604, -0.0453,  0.1202]) tensor([ 0.0681,  0.0353, -0.0429, -0.1864]) False 1.0
tensor([ 0.0681,  0.0353, -0.0429, -0.1864]) tensor([ 0.0688, -0.1592, -0.0466,  0.0924]) False 1.0
tensor([ 0.0688, -0.1592, -0.0466,  0.0924]) tensor([ 0.0656,  0.0366, -0.0448, -0.2146]) False 1.0
tensor([ 0.0656,  0.0366, -0.0448, -0.2146]) tensor([ 0.0663, -0.1579, -0.0490,  0.0637]) False 1.0
tensor([ 0.0663, -0.1579, -0.0490,  0.0637]) tensor([ 0.0632, -0.3522, -0.0478,  0.3405]) False 1.0
tensor([ 0.0632, -0.3522, -0.0478,  0.3405]) tensor([ 0.0561, -0.1565, -0.0410,  0.0331]) False 1.0
tensor([ 0.0561, -0.1565, -0.0410,  0.0331]) tensor([ 0.0530,  0.0392, -0.0403, -0.2722]) False 1.0
tensor([ 0.0530,  0.0392, -0.0403, -0.2722]) tensor([ 0.0538,  0.2349, -0.0457, -0.5773]) False 1.0


 18%|██████████████▎                                                                  | 529/3000 [00:07<00:40, 60.86it/s, control=0, cost=-20]

tensor([-0.0334, -0.3420, -0.0118,  0.4436]) tensor([-0.0402, -0.1467, -0.0029,  0.1472]) False 1.0
tensor([-0.0402, -0.1467, -0.0029,  0.1472]) tensor([-4.3168e-02,  4.8441e-02,  5.6248e-05, -1.4638e-01]) False 1.0
tensor([-4.3168e-02,  4.8441e-02,  5.6248e-05, -1.4638e-01]) tensor([-0.0422,  0.2436, -0.0029, -0.4390]) False 1.0
tensor([-0.0422,  0.2436, -0.0029, -0.4390]) tensor([-0.0373,  0.4387, -0.0117, -0.7326]) False 1.0
tensor([-0.0373,  0.4387, -0.0117, -0.7326]) tensor([-0.0286,  0.2438, -0.0263, -0.4436]) False 1.0
tensor([-0.0286,  0.2438, -0.0263, -0.4436]) tensor([-0.0237,  0.0490, -0.0352, -0.1594]) False 1.0
tensor([-0.0237,  0.0490, -0.0352, -0.1594]) tensor([-0.0227,  0.2446, -0.0384, -0.4629]) False 1.0
tensor([-0.0227,  0.2446, -0.0384, -0.4629]) tensor([-0.0178,  0.4403, -0.0476, -0.7675]) False 1.0
tensor([-0.0178,  0.4403, -0.0476, -0.7675]) tensor([-0.0090,  0.6360, -0.0630, -1.0747]) False 1.0
tensor([-0.0090,  0.6360, -0.0630, -1.0747]) tensor([ 0.0037,  0.831

 18%|██████████████▋                                                                  | 542/3000 [00:08<00:49, 49.88it/s, control=0, cost=-20]

tensor([ 0.1366,  0.6085, -0.0199, -0.3518]) tensor([ 0.1487,  0.8039, -0.0269, -0.6507]) False 1.0
tensor([ 0.1487,  0.8039, -0.0269, -0.6507]) tensor([ 0.1648,  0.6092, -0.0399, -0.3666]) False 1.0
tensor([ 0.1648,  0.6092, -0.0399, -0.3666]) tensor([ 0.1770,  0.8048, -0.0473, -0.6716]) False 1.0
tensor([ 0.1770,  0.8048, -0.0473, -0.6716]) tensor([ 0.1931,  0.6104, -0.0607, -0.3942]) False 1.0
tensor([ 0.1931,  0.6104, -0.0607, -0.3942]) tensor([ 0.2053,  0.4162, -0.0686, -0.1213]) False 1.0
tensor([ 0.2053,  0.4162, -0.0686, -0.1213]) tensor([ 0.2136,  0.6122, -0.0710, -0.4348]) False 1.0
tensor([ 0.2136,  0.6122, -0.0710, -0.4348]) tensor([ 0.2259,  0.4182, -0.0797, -0.1653]) False 1.0
tensor([ 0.2259,  0.4182, -0.0797, -0.1653]) tensor([ 0.2342,  0.6143, -0.0830, -0.4820]) False 1.0
tensor([ 0.2342,  0.6143, -0.0830, -0.4820]) tensor([ 0.2465,  0.4205, -0.0926, -0.2166]) False 1.0
tensor([ 0.2465,  0.4205, -0.0926, -0.2166]) tensor([ 0.2549,  0.6168, -0.0970, -0.5370]) False 1.0


 18%|██████████████▊                                                                  | 548/3000 [00:08<00:56, 43.12it/s, control=0, cost=-75]

tensor([-0.0267,  0.1412,  0.0596, -0.0033]) tensor([-0.0239, -0.0547,  0.0596,  0.3076]) False 1.0
tensor([-0.0239, -0.0547,  0.0596,  0.3076]) tensor([-0.0250,  0.1395,  0.0657,  0.0343]) False 1.0
tensor([-0.0250,  0.1395,  0.0657,  0.0343]) tensor([-0.0222, -0.0565,  0.0664,  0.3470]) False 1.0
tensor([-0.0222, -0.0565,  0.0664,  0.3470]) tensor([-0.0233,  0.1376,  0.0734,  0.0760]) False 1.0
tensor([-0.0233,  0.1376,  0.0734,  0.0760]) tensor([-0.0206, -0.0585,  0.0749,  0.3909]) False 1.0
tensor([-0.0206, -0.0585,  0.0749,  0.3909]) tensor([-0.0218, -0.2546,  0.0827,  0.7062]) False 1.0
tensor([-0.0218, -0.2546,  0.0827,  0.7062]) tensor([-0.0268, -0.0607,  0.0968,  0.4406]) False 1.0
tensor([-0.0268, -0.0607,  0.0968,  0.4406]) tensor([-0.0281, -0.2570,  0.1056,  0.7622]) False 1.0
tensor([-0.0281, -0.2570,  0.1056,  0.7622]) tensor([-0.0332, -0.0635,  0.1209,  0.5045]) False 1.0
tensor([-0.0332, -0.0635,  0.1209,  0.5045]) tensor([-0.0345,  0.1297,  0.1310,  0.2523]) False 1.0


 19%|███████████████▏                                                                 | 561/3000 [00:08<00:48, 50.42it/s, control=0, cost=-10]

tensor([-0.0709, -0.5828,  0.0901,  0.8943]) tensor([-0.0825, -0.7790,  0.1080,  1.2139]) False 1.0
tensor([-0.0825, -0.7790,  0.1080,  1.2139]) tensor([-0.0981, -0.9753,  0.1323,  1.5383]) False 1.0
tensor([-0.0981, -0.9753,  0.1323,  1.5383]) tensor([-0.1176, -1.1718,  0.1631,  1.8692]) False 1.0
tensor([-0.1176, -1.1718,  0.1631,  1.8692]) tensor([-0.1410, -1.3682,  0.2004,  2.2078]) False 1.0
tensor([-0.1410, -1.3682,  0.2004,  2.2078]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0302, -0.4193,  0.0205,  0.5775]) tensor([-0.0386, -0.6147,  0.0321,  0.8766]) False 1.0
tensor([-0.0386, -0.6147,  0.0321,  0.8766]) tensor([-0.0509, -0.4200,  0.0496,  0.5942]) False 1.0
tensor([-0.0509, -0.4200,  0.0496,  0.5942]) tensor([-0.0593, -0.2256,  0.0615,  0.3175]) False 1.0
tensor([-0.0593, -0.2256,  0.0615,  0.3175]) tensor([-0.0638, -0.4216,  0.0678,  0.6289]) False 1.0
tensor([-0.0638, -0.4216,  0.0678,  0.6289]) tensor([-0.0722, -0.6176,  0.0804,  0.9422]) False 1.0
tensor([-0.0722, -0.6

 19%|███████████████▍                                                                 | 573/3000 [00:08<00:45, 53.55it/s, control=0, cost=-50]

tensor([-0.1010,  0.1766,  0.1897,  0.2156]) False 1.0
tensor([-0.1010,  0.1766,  0.1897,  0.2156]) tensor([-0.0975,  0.3686,  0.1940, -0.0118]) False 1.0
tensor([-0.0975,  0.3686,  0.1940, -0.0118]) tensor([-0.0901,  0.1713,  0.1938,  0.3353]) False 1.0
tensor([-0.0901,  0.1713,  0.1938,  0.3353]) tensor([-0.0867, -0.0260,  0.2005,  0.6823]) False 1.0
tensor([-0.0867, -0.0260,  0.2005,  0.6823]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0181, -0.3620, -0.0197,  0.5832]) tensor([-0.0253, -0.5569, -0.0081,  0.8696]) False 1.0
tensor([-0.0253, -0.5569, -0.0081,  0.8696]) tensor([-0.0365, -0.7519,  0.0093,  1.1597]) False 1.0
tensor([-0.0365, -0.7519,  0.0093,  1.1597]) tensor([-0.0515, -0.5569,  0.0325,  0.8699]) False 1.0
tensor([-0.0515, -0.5569,  0.0325,  0.8699]) tensor([-0.0626, -0.3622,  0.0499,  0.5877]) False 1.0
tensor([-0.0626, -0.3622,  0.0499,  0.5877]) tensor([-0.0699, -0.5580,  0.0617,  0.8956]) False 1.0
tensor([-0.0699, -0.5580,  0.0617,  0.8956]) tensor([-0.0811, -0.7

 20%|███████████████▊                                                                 | 585/3000 [00:08<00:44, 54.25it/s, control=0, cost=-10]

tensor([ 0.4680,  1.7158, -0.0237, -1.1347]) tensor([ 0.5023,  1.9112, -0.0464, -1.4347]) False 1.0
tensor([ 0.5023,  1.9112, -0.0464, -1.4347]) tensor([ 0.5405,  1.7167, -0.0751, -1.1568]) False 1.0
tensor([ 0.5405,  1.7167, -0.0751, -1.1568]) tensor([ 0.5748,  1.9127, -0.0982, -1.4721]) False 1.0
tensor([ 0.5748,  1.9127, -0.0982, -1.4721]) tensor([ 0.6131,  1.7189, -0.1277, -1.2116]) False 1.0
tensor([ 0.6131,  1.7189, -0.1277, -1.2116]) tensor([ 0.6475,  1.9154, -0.1519, -1.5414]) False 1.0
tensor([ 0.6475,  1.9154, -0.1519, -1.5414]) tensor([ 0.6858,  1.7224, -0.1827, -1.2998]) False 1.0
tensor([ 0.6858,  1.7224, -0.1827, -1.2998]) tensor([ 0.7202,  1.5300, -0.2087, -1.0694]) False 1.0
tensor([ 0.7202,  1.5300, -0.2087, -1.0694]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0126,  0.3945, -0.0191, -0.5734]) tensor([-0.0047,  0.1997, -0.0305, -0.2868]) False 1.0
tensor([-0.0047,  0.1997, -0.0305, -0.2868]) tensor([-0.0007,  0.3952, -0.0363, -0.5890]) False 1.0
tensor([-0.0007,  0.3

 20%|████████████████▏                                                                | 598/3000 [00:09<00:42, 57.07it/s, control=0, cost=-10]

tensor([-0.1014, -0.8248,  0.0687,  1.1269]) tensor([-0.1179, -1.0208,  0.0912,  1.4403]) False 1.0
tensor([-0.1179, -1.0208,  0.0912,  1.4403]) tensor([-0.1383, -1.2169,  0.1201,  1.7600]) False 1.0
tensor([-0.1383, -1.2169,  0.1201,  1.7600]) tensor([-0.1626, -1.4132,  0.1553,  2.0875]) False 1.0
tensor([-0.1626, -1.4132,  0.1553,  2.0875]) tensor([-0.1909, -1.6095,  0.1970,  2.4239]) False 1.0
tensor([-0.1909, -1.6095,  0.1970,  2.4239]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0102,  0.3416, -0.0139, -0.5975]) tensor([ 0.0170,  0.5369, -0.0259, -0.8945]) False 1.0
tensor([ 0.0170,  0.5369, -0.0259, -0.8945]) tensor([ 0.0278,  0.3422, -0.0437, -0.6100]) False 1.0
tensor([ 0.0278,  0.3422, -0.0437, -0.6100]) tensor([ 0.0346,  0.5379, -0.0559, -0.9162]) False 1.0
tensor([ 0.0346,  0.5379, -0.0559, -0.9162]) tensor([ 0.0454,  0.7337, -0.0743, -1.2259]) False 1.0
tensor([ 0.0454,  0.7337, -0.0743, -1.2259]) tensor([ 0.0600,  0.5396, -0.0988, -0.9574]) False 1.0
tensor([ 0.0600,  0.5

 20%|████████████████▎                                                                | 605/3000 [00:09<00:41, 57.46it/s, control=0, cost=-10]

tensor([-0.0140, -0.0077,  0.0557,  0.0453]) tensor([-0.0141,  0.1866,  0.0566, -0.2293]) False 1.0
tensor([-0.0141,  0.1866,  0.0566, -0.2293]) tensor([-0.0104, -0.0093,  0.0520,  0.0807]) False 1.0
tensor([-0.0104, -0.0093,  0.0520,  0.0807]) tensor([-0.0106, -0.2051,  0.0536,  0.3893]) False 1.0
tensor([-0.0106, -0.2051,  0.0536,  0.3893]) tensor([-0.0147, -0.4009,  0.0614,  0.6984]) False 1.0
tensor([-0.0147, -0.4009,  0.0614,  0.6984]) tensor([-0.0227, -0.5968,  0.0754,  1.0097]) False 1.0
tensor([-0.0227, -0.5968,  0.0754,  1.0097]) tensor([-0.0347, -0.7929,  0.0956,  1.3251]) False 1.0
tensor([-0.0347, -0.7929,  0.0956,  1.3251]) tensor([-0.0505, -0.5991,  0.1221,  1.0638]) False 1.0
tensor([-0.0505, -0.5991,  0.1221,  1.0638]) tensor([-0.0625, -0.7956,  0.1433,  1.3922]) False 1.0
tensor([-0.0625, -0.7956,  0.1433,  1.3922]) tensor([-0.0784, -0.6025,  0.1712,  1.1475]) False 1.0
tensor([-0.0784, -0.6025,  0.1712,  1.1475]) tensor([-0.0905, -0.7994,  0.1941,  1.4886]) False 1.0


 21%|████████████████▋                                                                | 619/3000 [00:09<00:39, 59.95it/s, control=0, cost=-20]

tensor([ 0.0292,  0.2226, -0.0549, -0.2771]) tensor([ 0.0337,  0.4185, -0.0604, -0.5866]) False 1.0
tensor([ 0.0337,  0.4185, -0.0604, -0.5866]) tensor([ 0.0421,  0.6144, -0.0722, -0.8977]) False 1.0
tensor([ 0.0421,  0.6144, -0.0722, -0.8977]) tensor([ 0.0544,  0.8104, -0.0901, -1.2122]) False 1.0
tensor([ 0.0544,  0.8104, -0.0901, -1.2122]) tensor([ 0.0706,  0.6166, -0.1144, -0.9490]) False 1.0
tensor([ 0.0706,  0.6166, -0.1144, -0.9490]) tensor([ 0.0829,  0.8130, -0.1334, -1.2754]) False 1.0
tensor([ 0.0829,  0.8130, -0.1334, -1.2754]) tensor([ 0.0992,  0.6198, -0.1589, -1.0272]) False 1.0
tensor([ 0.0992,  0.6198, -0.1589, -1.0272]) tensor([ 0.1116,  0.8167, -0.1794, -1.3653]) False 1.0
tensor([ 0.1116,  0.8167, -0.1794, -1.3653]) tensor([ 0.1279,  1.0135, -0.2067, -1.7083]) False 1.0
tensor([ 0.1279,  1.0135, -0.2067, -1.7083]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0066, -0.0109, -0.0442, -0.0146]) tensor([-0.0069, -0.2054, -0.0445,  0.2638]) False 1.0
tensor([-0.0069, -0.2

 21%|█████████████████                                                                | 634/3000 [00:09<00:38, 61.46it/s, control=0, cost=-30]

tensor([ 0.0430,  0.0454, -0.0158, -0.0565]) tensor([ 0.0439,  0.2408, -0.0169, -0.3541]) False 1.0
tensor([ 0.0439,  0.2408, -0.0169, -0.3541]) tensor([ 0.0488,  0.4361, -0.0240, -0.6520]) False 1.0
tensor([ 0.0488,  0.4361, -0.0240, -0.6520]) tensor([ 0.0575,  0.6316, -0.0370, -0.9522]) False 1.0
tensor([ 0.0575,  0.6316, -0.0370, -0.9522]) tensor([ 0.0701,  0.4370, -0.0561, -0.6714]) False 1.0
tensor([ 0.0701,  0.4370, -0.0561, -0.6714]) tensor([ 0.0789,  0.2427, -0.0695, -0.3969]) False 1.0
tensor([ 0.0789,  0.2427, -0.0695, -0.3969]) tensor([ 0.0837,  0.0486, -0.0775, -0.1269]) False 1.0
tensor([ 0.0837,  0.0486, -0.0775, -0.1269]) tensor([ 0.0847,  0.2447, -0.0800, -0.4430]) False 1.0
tensor([ 0.0847,  0.2447, -0.0800, -0.4430]) tensor([ 0.0896,  0.0508, -0.0888, -0.1765]) False 1.0
tensor([ 0.0896,  0.0508, -0.0888, -0.1765]) tensor([ 0.0906, -0.1429, -0.0924,  0.0869]) False 1.0
tensor([ 0.0906, -0.1429, -0.0924,  0.0869]) tensor([ 0.0877,  0.0534, -0.0906, -0.2335]) False 1.0


 21%|█████████████████▎                                                               | 641/3000 [00:09<00:40, 58.71it/s, control=0, cost=-20]

tensor([0.0931, 0.3901, 0.0197, 0.1638]) tensor([ 0.1009,  0.5849,  0.0230, -0.1226]) False 1.0
tensor([ 0.1009,  0.5849,  0.0230, -0.1226]) tensor([ 0.1126,  0.7797,  0.0206, -0.4079]) False 1.0
tensor([ 0.1126,  0.7797,  0.0206, -0.4079]) tensor([ 0.1282,  0.5843,  0.0124, -0.1088]) False 1.0
tensor([ 0.1282,  0.5843,  0.0124, -0.1088]) tensor([0.1399, 0.3890, 0.0102, 0.1878]) False 1.0
tensor([0.1399, 0.3890, 0.0102, 0.1878]) tensor([ 0.1477,  0.5840,  0.0140, -0.1017]) False 1.0
tensor([ 0.1477,  0.5840,  0.0140, -0.1017]) tensor([ 0.1594,  0.7789,  0.0120, -0.3899]) False 1.0
tensor([ 0.1594,  0.7789,  0.0120, -0.3899]) tensor([ 0.1750,  0.9738,  0.0042, -0.6788]) False 1.0
tensor([ 0.1750,  0.9738,  0.0042, -0.6788]) tensor([ 0.1944,  1.1689, -0.0094, -0.9702]) False 1.0
tensor([ 0.1944,  1.1689, -0.0094, -0.9702]) tensor([ 0.2178,  1.3641, -0.0288, -1.2658]) False 1.0
tensor([ 0.2178,  1.3641, -0.0288, -1.2658]) tensor([ 0.2451,  1.5596, -0.0541, -1.5674]) False 1.0
tensor([ 0.2

 22%|█████████████████▋                                                               | 655/3000 [00:10<00:39, 60.08it/s, control=0, cost=-30]

tensor([ 0.1616, -0.1862, -0.1981, -0.0854]) tensor([ 0.1579,  0.0111, -0.1998, -0.4335]) False 1.0
tensor([ 0.1579,  0.0111, -0.1998, -0.4335]) tensor([ 0.1581,  0.2085, -0.2084, -0.7819]) False 1.0
tensor([ 0.1581,  0.2085, -0.2084, -0.7819]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0068,  0.3613,  0.0285, -0.5810]) tensor([ 0.0140,  0.5560,  0.0168, -0.8646]) False 1.0
tensor([ 0.0140,  0.5560,  0.0168, -0.8646]) tensor([ 2.5137e-02,  7.5090e-01, -4.4714e-04, -1.1519e+00]) False 1.0
tensor([ 2.5137e-02,  7.5090e-01, -4.4714e-04, -1.1519e+00]) tensor([ 0.0402,  0.9460, -0.0235, -1.4448]) False 1.0
tensor([ 0.0402,  0.9460, -0.0235, -1.4448]) tensor([ 0.0591,  1.1414, -0.0524, -1.7447]) False 1.0
tensor([ 0.0591,  1.1414, -0.0524, -1.7447]) tensor([ 0.0819,  1.3371, -0.0873, -2.0532]) False 1.0
tensor([ 0.0819,  1.3371, -0.0873, -2.0532]) tensor([ 0.1086,  1.5330, -0.1283, -2.3716]) False 1.0
tensor([ 0.1086,  1.5330, -0.1283, -2.3716]) tensor([ 0.1393,  1.3392, -0.1758, -2.1209])

 22%|██████████████████                                                               | 669/3000 [00:10<00:38, 60.12it/s, control=0, cost=-19]

tensor([ 0.1470,  1.0797,  0.1712, -0.2837]) False 1.0
tensor([ 0.1470,  1.0797,  0.1712, -0.2837]) tensor([0.1686, 0.8826, 0.1655, 0.0577]) False 1.0
tensor([0.1686, 0.8826, 0.1655, 0.0577]) tensor([0.1863, 0.6855, 0.1666, 0.3977]) False 1.0
tensor([0.1863, 0.6855, 0.1666, 0.3977]) tensor([0.2000, 0.4885, 0.1746, 0.7379]) False 1.0
tensor([0.2000, 0.4885, 0.1746, 0.7379]) tensor([0.2097, 0.6808, 0.1893, 0.5049]) False 1.0
tensor([0.2097, 0.6808, 0.1893, 0.5049]) tensor([0.2234, 0.8728, 0.1994, 0.2773]) False 1.0
tensor([0.2234, 0.8728, 0.1994, 0.2773]) tensor([0.2408, 0.6755, 0.2050, 0.6257]) False 1.0
tensor([0.2408, 0.6755, 0.2050, 0.6257]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0139,  0.0127,  0.0239,  0.0085]) tensor([-0.0136,  0.2074,  0.0240, -0.2765]) False 1.0
tensor([-0.0136,  0.2074,  0.0240, -0.2765]) tensor([-0.0095,  0.4022,  0.0185, -0.5615]) False 1.0
tensor([-0.0095,  0.4022,  0.0185, -0.5615]) tensor([-0.0015,  0.2068,  0.0073, -0.2631]) False 1.0
tensor([-0.001

 23%|██████████████████▍                                                              | 683/3000 [00:10<00:39, 58.21it/s, control=0, cost=-10]

tensor([ 0.0107, -0.0170, -0.0106,  0.0176]) tensor([ 0.0103,  0.1782, -0.0103, -0.2785]) False 1.0
tensor([ 0.0103,  0.1782, -0.0103, -0.2785]) tensor([ 0.0139,  0.3735, -0.0158, -0.5744]) False 1.0
tensor([ 0.0139,  0.3735, -0.0158, -0.5744]) tensor([ 0.0214,  0.5689, -0.0273, -0.8720]) False 1.0
tensor([ 0.0214,  0.5689, -0.0273, -0.8720]) tensor([ 0.0327,  0.3741, -0.0448, -0.5880]) False 1.0
tensor([ 0.0327,  0.3741, -0.0448, -0.5880]) tensor([ 0.0402,  0.5698, -0.0565, -0.8945]) False 1.0
tensor([ 0.0402,  0.5698, -0.0565, -0.8945]) tensor([ 0.0516,  0.7657, -0.0744, -1.2044]) False 1.0
tensor([ 0.0516,  0.7657, -0.0744, -1.2044]) tensor([ 0.0669,  0.9617, -0.0985, -1.5194]) False 1.0
tensor([ 0.0669,  0.9617, -0.0985, -1.5194]) tensor([ 0.0862,  1.1578, -0.1289, -1.8411]) False 1.0
tensor([ 0.0862,  1.1578, -0.1289, -1.8411]) tensor([ 0.1093,  1.3541, -0.1657, -2.1709]) False 1.0
tensor([ 0.1093,  1.3541, -0.1657, -2.1709]) tensor([ 0.1364,  1.1610, -0.2091, -1.9336]) False 1.0


 23%|██████████████████▌                                                              | 689/3000 [00:10<00:40, 57.31it/s, control=0, cost=-10]

tensor([ 0.0115,  0.5558,  0.0201, -0.7906]) tensor([ 0.0227,  0.3604,  0.0043, -0.4917]) False 1.0
tensor([ 0.0227,  0.3604,  0.0043, -0.4917]) tensor([ 0.0299,  0.1652, -0.0055, -0.1976]) False 1.0
tensor([ 0.0299,  0.1652, -0.0055, -0.1976]) tensor([ 0.0332, -0.0298, -0.0095,  0.0933]) False 1.0
tensor([ 0.0332, -0.0298, -0.0095,  0.0933]) tensor([ 0.0326,  0.1654, -0.0076, -0.2024]) False 1.0
tensor([ 0.0326,  0.1654, -0.0076, -0.2024]) tensor([ 0.0359,  0.3606, -0.0117, -0.4974]) False 1.0
tensor([ 0.0359,  0.3606, -0.0117, -0.4974]) tensor([ 0.0431,  0.5559, -0.0216, -0.7938]) False 1.0
tensor([ 0.0431,  0.5559, -0.0216, -0.7938]) tensor([ 0.0542,  0.3611, -0.0375, -0.5080]) False 1.0
tensor([ 0.0542,  0.3611, -0.0375, -0.5080]) tensor([ 0.0614,  0.5567, -0.0477, -0.8122]) False 1.0
tensor([ 0.0614,  0.5567, -0.0477, -0.8122]) tensor([ 0.0726,  0.3623, -0.0639, -0.5349]) False 1.0
tensor([ 0.0726,  0.3623, -0.0639, -0.5349]) tensor([ 0.0798,  0.1681, -0.0746, -0.2630]) False 1.0


 23%|██████████████████▉                                                              | 702/3000 [00:10<00:40, 57.17it/s, control=0, cost=-10]

tensor([ 0.0440,  0.1756, -0.0180, -0.0143]) tensor([ 0.0475,  0.3709, -0.0183, -0.3126]) False 1.0
tensor([ 0.0475,  0.3709, -0.0183, -0.3126]) tensor([ 0.0549,  0.5663, -0.0245, -0.6109]) False 1.0
tensor([ 0.0549,  0.5663, -0.0245, -0.6109]) tensor([ 0.0663,  0.3715, -0.0367, -0.3261]) False 1.0
tensor([ 0.0663,  0.3715, -0.0367, -0.3261]) tensor([ 0.0737,  0.1770, -0.0433, -0.0452]) False 1.0
tensor([ 0.0737,  0.1770, -0.0433, -0.0452]) tensor([ 0.0772,  0.3727, -0.0442, -0.3512]) False 1.0
tensor([ 0.0772,  0.3727, -0.0442, -0.3512]) tensor([ 0.0847,  0.5684, -0.0512, -0.6575]) False 1.0
tensor([ 0.0847,  0.5684, -0.0512, -0.6575]) tensor([ 0.0960,  0.3740, -0.0643, -0.3814]) False 1.0
tensor([ 0.0960,  0.3740, -0.0643, -0.3814]) tensor([ 0.1035,  0.5700, -0.0720, -0.6936]) False 1.0
tensor([ 0.1035,  0.5700, -0.0720, -0.6936]) tensor([ 0.1149,  0.3759, -0.0858, -0.4244]) False 1.0
tensor([ 0.1149,  0.3759, -0.0858, -0.4244]) tensor([ 0.1224,  0.5722, -0.0943, -0.7429]) False 1.0


 24%|███████████████████▎                                                             | 716/3000 [00:11<00:39, 57.90it/s, control=0, cost=-10]

tensor([ 0.0560,  0.8400, -0.1814, -1.4574]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0046,  0.3746, -0.0351, -0.6038]) tensor([ 0.0120,  0.1800, -0.0472, -0.3224]) False 1.0
tensor([ 0.0120,  0.1800, -0.0472, -0.3224]) tensor([ 0.0156,  0.3758, -0.0536, -0.6296]) False 1.0
tensor([ 0.0156,  0.3758, -0.0536, -0.6296]) tensor([ 0.0232,  0.5716, -0.0662, -0.9387]) False 1.0
tensor([ 0.0232,  0.5716, -0.0662, -0.9387]) tensor([ 0.0346,  0.7675, -0.0850, -1.2514]) False 1.0
tensor([ 0.0346,  0.7675, -0.0850, -1.2514]) tensor([ 0.0499,  0.5736, -0.1100, -0.9865]) False 1.0
tensor([ 0.0499,  0.5736, -0.1100, -0.9865]) tensor([ 0.0614,  0.7700, -0.1298, -1.3116]) False 1.0
tensor([ 0.0614,  0.7700, -0.1298, -1.3116]) tensor([ 0.0768,  0.9665, -0.1560, -1.6419]) False 1.0
tensor([ 0.0768,  0.9665, -0.1560, -1.6419]) tensor([ 0.0961,  1.1631, -0.1888, -1.9789]) False 1.0
tensor([ 0.0961,  1.1631, -0.1888, -1.9789]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0194,  0.0118,  0.0525,  0.0674

 24%|███████████████████▍                                                             | 722/3000 [00:11<00:46, 48.48it/s, control=0, cost=-10]

tensor([ 0.1652,  1.1324, -0.1239, -1.6820]) tensor([ 0.1878,  1.3287, -0.1575, -2.0106]) False 1.0
tensor([ 0.1878,  1.3287, -0.1575, -2.0106]) tensor([ 0.2144,  1.5251, -0.1978, -2.3476]) False 1.0
tensor([ 0.2144,  1.5251, -0.1978, -2.3476]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0090, -0.4086,  0.0184,  0.6327]) tensor([-0.0171, -0.6040,  0.0310,  0.9311]) False 1.0
tensor([-0.0171, -0.6040,  0.0310,  0.9311]) tensor([-0.0292, -0.4093,  0.0496,  0.6484]) False 1.0
tensor([-0.0292, -0.4093,  0.0496,  0.6484]) tensor([-0.0374, -0.6051,  0.0626,  0.9563]) False 1.0
tensor([-0.0374, -0.6051,  0.0626,  0.9563]) tensor([-0.0495, -0.4109,  0.0817,  0.6839]) False 1.0
tensor([-0.0495, -0.4109,  0.0817,  0.6839]) tensor([-0.0577, -0.2170,  0.0954,  0.4180]) False 1.0
tensor([-0.0577, -0.2170,  0.0954,  0.4180]) tensor([-0.0620, -0.0233,  0.1038,  0.1569]) False 1.0
tensor([-0.0620, -0.0233,  0.1038,  0.1569]) tensor([-0.0625,  0.1702,  0.1069, -0.1014]) False 1.0
tensor([-0.0625,  0.1

 24%|███████████████████▋                                                             | 728/3000 [00:11<00:49, 45.48it/s, control=0, cost=-60]

tensor([-0.0915, -0.6185,  0.0994,  1.0087]) tensor([-0.1039, -0.4248,  0.1195,  0.7488]) False 1.0
tensor([-0.1039, -0.4248,  0.1195,  0.7488]) tensor([-0.1124, -0.6214,  0.1345,  1.0765]) False 1.0
tensor([-0.1124, -0.6214,  0.1345,  1.0765]) tensor([-0.1248, -0.4283,  0.1560,  0.8289]) False 1.0
tensor([-0.1248, -0.4283,  0.1560,  0.8289]) tensor([-0.1334, -0.2356,  0.1726,  0.5891]) False 1.0
tensor([-0.1334, -0.2356,  0.1726,  0.5891]) tensor([-0.1381, -0.4326,  0.1844,  0.9308]) False 1.0
tensor([-0.1381, -0.4326,  0.1844,  0.9308]) tensor([-0.1468, -0.6297,  0.2030,  1.2753]) False 1.0
tensor([-0.1468, -0.6297,  0.2030,  1.2753]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0463, -0.4208,  0.0364,  0.6018]) tensor([-0.0547, -0.2262,  0.0484,  0.3208]) False 1.0
tensor([-0.0547, -0.2262,  0.0484,  0.3208]) tensor([-0.0592, -0.4220,  0.0548,  0.6283]) False 1.0
tensor([-0.0592, -0.4220,  0.0548,  0.6283]) tensor([-0.0676, -0.2277,  0.0674,  0.3534]) False 1.0
tensor([-0.0676, -0.2

 25%|███████████████████▉                                                             | 738/3000 [00:11<00:50, 44.39it/s, control=0, cost=-22]

tensor([0.4678, 0.2787, 0.2085, 1.2420]) tensor([0., 0., 0., 0.]) True 1.0
tensor([-0.0101, -0.3946,  0.0562,  0.6226]) tensor([-0.0180, -0.5905,  0.0686,  0.9324]) False 1.0
tensor([-0.0180, -0.5905,  0.0686,  0.9324]) tensor([-0.0298, -0.7865,  0.0873,  1.2459]) False 1.0
tensor([-0.0298, -0.7865,  0.0873,  1.2459]) tensor([-0.0456, -0.5926,  0.1122,  0.9817]) False 1.0
tensor([-0.0456, -0.5926,  0.1122,  0.9817]) tensor([-0.0574, -0.7890,  0.1318,  1.3074]) False 1.0
tensor([-0.0574, -0.7890,  0.1318,  1.3074]) tensor([-0.0732, -0.5958,  0.1580,  1.0587]) False 1.0
tensor([-0.0732, -0.5958,  0.1580,  1.0587]) tensor([-0.0851, -0.7926,  0.1791,  1.3966]) False 1.0
tensor([-0.0851, -0.7926,  0.1791,  1.3966]) tensor([-0.1010, -0.9894,  0.2071,  1.7395]) False 1.0
tensor([-0.1010, -0.9894,  0.2071,  1.7395]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 2.9256e-03,  4.1227e-01, -1.2263e-04, -5.4369e-01]) tensor([ 0.0112,  0.2172, -0.0110, -0.2510]) False 1.0
tensor([ 0.0112,  0.2172, -0.0

 25%|████████████████████▎                                                            | 750/3000 [00:11<00:45, 49.98it/s, control=0, cost=-10]

tensor([ 0.0451,  0.1946,  0.0020, -0.2219]) tensor([ 0.0490,  0.3897, -0.0024, -0.5140]) False 1.0
tensor([ 0.0490,  0.3897, -0.0024, -0.5140]) tensor([ 0.0568,  0.5849, -0.0127, -0.8074]) False 1.0
tensor([ 0.0568,  0.5849, -0.0127, -0.8074]) tensor([ 0.0684,  0.7801, -0.0289, -1.1041]) False 1.0
tensor([ 0.0684,  0.7801, -0.0289, -1.1041]) tensor([ 0.0841,  0.5854, -0.0509, -0.8206]) False 1.0
tensor([ 0.0841,  0.5854, -0.0509, -0.8206]) tensor([ 0.0958,  0.3910, -0.0673, -0.5443]) False 1.0
tensor([ 0.0958,  0.3910, -0.0673, -0.5443]) tensor([ 0.1036,  0.5870, -0.0782, -0.8575]) False 1.0
tensor([ 0.1036,  0.5870, -0.0782, -0.8575]) tensor([ 0.1153,  0.7831, -0.0954, -1.1737]) False 1.0
tensor([ 0.1153,  0.7831, -0.0954, -1.1737]) tensor([ 0.1310,  0.5894, -0.1189, -0.9124]) False 1.0
tensor([ 0.1310,  0.5894, -0.1189, -0.9124]) tensor([ 0.1428,  0.7859, -0.1371, -1.2399]) False 1.0
tensor([ 0.1428,  0.7859, -0.1371, -1.2399]) tensor([ 0.1585,  0.5928, -0.1619, -0.9931]) False 1.0


 25%|████████████████████▌                                                            | 762/3000 [00:12<00:42, 52.58it/s, control=0, cost=-40]

tensor([ 0.0004,  0.1719, -0.0331, -0.3269]) tensor([ 0.0038,  0.3675, -0.0397, -0.6298]) False 1.0
tensor([ 0.0038,  0.3675, -0.0397, -0.6298]) tensor([ 0.0112,  0.5632, -0.0523, -0.9348]) False 1.0
tensor([ 0.0112,  0.5632, -0.0523, -0.9348]) tensor([ 0.0225,  0.3688, -0.0710, -0.6589]) False 1.0
tensor([ 0.0225,  0.3688, -0.0710, -0.6589]) tensor([ 0.0298,  0.1747, -0.0841, -0.3894]) False 1.0
tensor([ 0.0298,  0.1747, -0.0841, -0.3894]) tensor([ 0.0333, -0.0191, -0.0919, -0.1244]) False 1.0
tensor([ 0.0333, -0.0191, -0.0919, -0.1244]) tensor([ 0.0329, -0.2128, -0.0944,  0.1379]) False 1.0
tensor([ 0.0329, -0.2128, -0.0944,  0.1379]) tensor([ 0.0287, -0.0165, -0.0917, -0.1830]) False 1.0
tensor([ 0.0287, -0.0165, -0.0917, -0.1830]) tensor([ 0.0284, -0.2102, -0.0953,  0.0794]) False 1.0
tensor([ 0.0284, -0.2102, -0.0953,  0.0794]) tensor([ 0.0242, -0.0138, -0.0937, -0.2418]) False 1.0
tensor([ 0.0242, -0.0138, -0.0937, -0.2418]) tensor([ 0.0239, -0.2075, -0.0986,  0.0199]) False 1.0


 26%|████████████████████▉                                                            | 775/3000 [00:12<00:39, 56.46it/s, control=0, cost=-25]

tensor([ 0.0042,  0.3561,  0.0395, -0.5489]) tensor([ 0.0114,  0.5507,  0.0285, -0.8289]) False 1.0
tensor([ 0.0114,  0.5507,  0.0285, -0.8289]) tensor([ 0.0224,  0.7454,  0.0119, -1.1125]) False 1.0
tensor([ 0.0224,  0.7454,  0.0119, -1.1125]) tensor([ 0.0373,  0.9403, -0.0103, -1.4014]) False 1.0
tensor([ 0.0373,  0.9403, -0.0103, -1.4014]) tensor([ 0.0561,  1.1356, -0.0383, -1.6973]) False 1.0
tensor([ 0.0561,  1.1356, -0.0383, -1.6973]) tensor([ 0.0788,  1.3311, -0.0723, -2.0017]) False 1.0
tensor([ 0.0788,  1.3311, -0.0723, -2.0017]) tensor([ 0.1054,  1.1368, -0.1123, -1.7322]) False 1.0
tensor([ 0.1054,  1.1368, -0.1123, -1.7322]) tensor([ 0.1282,  1.3330, -0.1470, -2.0576]) False 1.0
tensor([ 0.1282,  1.3330, -0.1470, -2.0576]) tensor([ 0.1548,  1.1397, -0.1881, -1.8138]) False 1.0
tensor([ 0.1548,  1.1397, -0.1881, -1.8138]) tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0073,  0.3412,  0.0381, -0.5286]) tensor([ 0.0141,  0.5358,  0.0276, -0.8090]) False 1.0
tensor([ 0.0141,  0.5

 26%|█████████████████████▎                                                           | 788/3000 [00:12<00:38, 57.19it/s, control=0, cost=-10]

tensor([-0.0318,  0.3505,  0.0370, -0.5380]) tensor([-0.0248,  0.5451,  0.0263, -0.8188]) False 1.0
tensor([-0.0248,  0.5451,  0.0263, -0.8188]) tensor([-0.0139,  0.3496,  0.0099, -0.5180]) False 1.0
tensor([-0.0139,  0.3496,  0.0099, -0.5180]) tensor([-6.9127e-03,  5.4463e-01, -4.6462e-04, -8.0752e-01]) False 1.0
tensor([-6.9127e-03,  5.4463e-01, -4.6462e-04, -8.0752e-01]) tensor([ 0.0040,  0.7398, -0.0166, -1.1004]) False 1.0
tensor([ 0.0040,  0.7398, -0.0166, -1.1004]) tensor([ 0.0188,  0.9351, -0.0386, -1.3982]) False 1.0
tensor([ 0.0188,  0.9351, -0.0386, -1.3982]) tensor([ 0.0375,  1.1307, -0.0666, -1.7027]) False 1.0
tensor([ 0.0375,  1.1307, -0.0666, -1.7027]) tensor([ 0.0601,  1.3265, -0.1006, -2.0153]) False 1.0
tensor([ 0.0601,  1.3265, -0.1006, -2.0153]) tensor([ 0.0866,  1.5225, -0.1409, -2.3374]) False 1.0
tensor([ 0.0866,  1.5225, -0.1409, -2.3374]) tensor([ 0.1171,  1.3289, -0.1877, -2.0912]) False 1.0
tensor([ 0.1171,  1.3289, -0.1877, -2.0912]) tensor([0., 0., 0., 0.]

 26%|█████████████████████▍                                                           | 794/3000 [00:12<00:40, 53.97it/s, control=0, cost=-10]

tensor([ 0.1015,  0.1749, -0.1190, -0.3979]) tensor([ 0.1050, -0.0184, -0.1270, -0.1450]) False 1.0
tensor([ 0.1050, -0.0184, -0.1270, -0.1450]) tensor([ 0.1046, -0.2115, -0.1299,  0.1051]) False 1.0
tensor([ 0.1046, -0.2115, -0.1299,  0.1051]) tensor([ 0.1004, -0.0148, -0.1278, -0.2256]) False 1.0
tensor([ 0.1004, -0.0148, -0.1278, -0.2256]) tensor([ 0.1001, -0.2078, -0.1323,  0.0242]) False 1.0
tensor([ 0.1001, -0.2078, -0.1323,  0.0242]) tensor([ 0.0959, -0.0111, -0.1318, -0.3071]) False 1.0
tensor([ 0.0959, -0.0111, -0.1318, -0.3071]) tensor([ 0.0957, -0.2041, -0.1380, -0.0588]) False 1.0
tensor([ 0.0957, -0.2041, -0.1380, -0.0588]) tensor([ 0.0916, -0.0073, -0.1391, -0.3916]) False 1.0
tensor([ 0.0916, -0.0073, -0.1391, -0.3916]) tensor([ 0.0915, -0.2002, -0.1470, -0.1458]) False 1.0
tensor([ 0.0915, -0.2002, -0.1470, -0.1458]) tensor([ 0.0875, -0.3930, -0.1499,  0.0971]) False 1.0
tensor([ 0.0875, -0.3930, -0.1499,  0.0971]) tensor([ 0.0796, -0.5857, -0.1479,  0.3390]) False 1.0


 27%|█████████████████████▊                                                           | 807/3000 [00:12<00:35, 62.33it/s, control=0, cost=-40]


tensor([0., 0., 0., 0.]) True 1.0
tensor([ 0.0496,  0.3640, -0.0513, -0.6333]) tensor([ 0.0569,  0.1696, -0.0639, -0.3571]) False 1.0
tensor([ 0.0569,  0.1696, -0.0639, -0.3571]) tensor([ 0.0603, -0.0246, -0.0711, -0.0853]) False 1.0
tensor([ 0.0603, -0.0246, -0.0711, -0.0853]) tensor([ 0.0598,  0.1715, -0.0728, -0.3995]) False 1.0
tensor([ 0.0598,  0.1715, -0.0728, -0.3995]) tensor([ 0.0632,  0.3676, -0.0808, -0.7142]) False 1.0
tensor([ 0.0632,  0.3676, -0.0808, -0.7142]) tensor([ 0.0705,  0.1737, -0.0951, -0.4480]) False 1.0
tensor([ 0.0705,  0.1737, -0.0951, -0.4480]) tensor([ 0.0740, -0.0200, -0.1040, -0.1868]) False 1.0
tensor([ 0.0740, -0.0200, -0.1040, -0.1868]) tensor([ 0.0736, -0.2135, -0.1078,  0.0714]) False 1.0
tensor([ 0.0736, -0.2135, -0.1078,  0.0714]) tensor([ 0.0694, -0.4069, -0.1063,  0.3282]) False 1.0
tensor([ 0.0694, -0.4069, -0.1063,  0.3282]) tensor([ 0.0612, -0.2105, -0.0998,  0.0040]) False 1.0
tensor([ 0.0612, -0.2105, -0.0998,  0.0040]) tensor([ 0.0570, -0.0


KeyboardInterrupt



In [None]:
# # save args and stats!  --  note that to save the args, we actually save the `get_args` function. we can print the 
# #                           source code later to see the hyperparameters we chose
# experiment.save(filename)

## Visualization
We keep track of the useful information through `Stats` objects, which can `register()` a variable to keep track of (which it does via calls to `update()`) and which can be aggregated via `Stats.aggregate()` for mean and variance statistics. 

We define below a plotting arrangement that plots all the desired quantities from both the system and controller.

In [None]:
def plot_ppo(experiment: Experiment):
    assert experiment.stats is not None, 'cannot plot the results of an experiment that hasnt been run'
    all_stats = experiment.stats
    
    # clear plot and calc nrows
    plt.clf()
    n = 5
    nrows = n + (len(all_stats) + 1) // 2
    fig, ax = plt.subplots(nrows, 2, figsize=(16, 4 * nrows))

    # plot system and controller stats
    for i, (method, stats) in enumerate(all_stats.items()):
        if stats is None: 
            print('WARNING: {} had no stats'.format(method))
            continue
        stats.plot(ax[0, 0], 'eps_clip', label=method)
#         stats.plot(ax[0, 1], 'ws', label=method)
        stats.plot(ax[1, 0], 'us', label=method)
        stats.plot(ax[2, 0], 'rewards', label=method)
        stats.plot(ax[2, 1], 'avg costs since reset', label=method)
    
        stats.plot(ax[3, 0], '||A||_op', label=method)
        stats.plot(ax[3, 1], '||B||_F', label=method)
        stats.plot(ax[4, 0], '||A-BK||_op', label=method)
        stats.plot(ax[4, 1], 'lifter losses', label=method)
        i_ax = ax[n + i // 2, i % 2]
        stats.plot(ax[0, 1], 'disturbances', label=method)
        stats.plot(i_ax, 'K @ state', label='K @ state')
        stats.plot(i_ax, 'M \cdot w', label='M \cdot w')
        stats.plot(i_ax, 'M0', label='M0')
        i_ax.set_title('u decomp for {}'.format(method))
        i_ax.legend()

    # set titles and legends and limits and such
    # (note: `ylim()` is so useful! because sometimes one thing blows up and then autoscale messes up all plots)
    _ax = ax[0, 0]; _ax.set_title('state'); _ax.legend(); ylim(_ax, -1, 2)
    _ax = ax[0, 1]; _ax.set_title('disturbances'); _ax.legend(); ylim(_ax, -5, 5)
    _ax = ax[1, 0]; _ax.set_title('controls'); _ax.legend(); ylim(_ax, -3, 3)
    
    _ax = ax[2, 0]; _ax.set_title('instantaneous rewards'); _ax.legend()
    _ax = ax[2, 1]; _ax.set_title('avg rewards since reset'); _ax.legend()    
    
    _ax = ax[3, 0]; _ax.set_title('||A||_op'); _ax.legend()
    _ax = ax[3, 1]; _ax.set_title('||B||_F'); _ax.legend()
    
    _ax = ax[4, 0]; _ax.set_title('||A-BK||_op'); _ax.legend()
    _ax = ax[4, 1]; _ax.set_title('lifter losses'); _ax.legend()
    pass

### Plot

In [None]:
# plot_ppo(experiment)
print(experiment.stats['0.1']._stats['us'].ts)

### Dynamic Plot

In [None]:
# # dynamic plot
# anim = render(experiment, 'xs', 'rewards', sliderkey='us', save_path=None, duration=5)
# vid = anim.to_html5_video()
# HTML(vid)