### 1. Import Dependencies

In [1]:
#!pip install tensorflow==2.7.0
#!pip install gym
#!pip install keras
#!pip install keras-rl2

In [30]:
from collections import Counter
from VMAllocationEnvironment import VMAllocationEnvironment
import numpy as np

### 2. Create Environment

In [15]:
env = VMAllocationEnvironment(10, 10)

In [16]:
env.state

array([[ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.        , 10.        , 10.        , 10.        ],
       [ 0.08700111, -0.37007874, -0.04905854, -0.02919099]],
      dtype=float32)

In [17]:
env.observation_space.sample()

array([[20.842003  ,  9.268549  ,  8.491825  ,  9.248245  ],
       [ 7.993942  ,  5.837442  ,  8.538009  ,  8.524099  ],
       [56.32744   ,  6.018296  ,  1.3318549 ,  5.052313  ],
       [17.787182  ,  8.201955  ,  0.91848487,  5.255874  ],
       [66.11823   ,  7.2543774 ,  9.299523  ,  1.6687855 ],
       [ 1.5231788 ,  2.648834  ,  5.2091193 ,  3.1549041 ],
       [42.98362   ,  9.342381  ,  5.484462  ,  5.595374  ],
       [53.622566  ,  8.072747  ,  0.47493726,  2.0717106 ],
       [89.79644   ,  6.46535   ,  3.8974352 ,  7.2549415 ],
       [11.811794  ,  3.754883  ,  8.440101  ,  5.153111  ],
       [25.798693  ,  3.2170134 ,  9.835156  ,  2.1679347 ]],
      dtype=float32)

### 3. Run control test

In [36]:
MAX_STEPS = 1000
episodes = 10

for episode in range(1, episodes+1):
    state = env.reset()
    steps = 0
    done = False
    score = 0
    
    session_info = Counter({ 'placed':0, 'misplaced':0, 'discarded':0 })
    
    while not done and steps < MAX_STEPS:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
        steps += 1
        session_info += Counter(info)

    print('Episode:{} Score:{}'.format(episode,score))
    print('Total steps: {}'.format(steps))
    
    # Counter method sometimes shuffles keys; this returns the order
    ordered_info = \
        sorted(dict(session_info).items(), key=lambda x:x[0], reverse=True)
    
    placed, misplaced, discarded = ordered_info[:]
    accuracy = (placed[1] / (placed[1] + misplaced[1])) * 100
    print('Accuracy of placed items: {:.3}%'.format(accuracy))
    
    first_time = (placed[1] / (placed[1] + misplaced[1] + discarded[1])) * 100
    print('Items placed correctly first time: {:.3}%'.format(first_time))
    
    print(ordered_info)
    print(f'Average value: {np.mean(np.take(env.state, 0, axis=1))}')
    
    print()
    
    #print(env.logs)
    #control_data.log(env)
    #env.logs = { 'placed':0, 'misplaced':0, 'discarded':0 }

Episode:1 Score:-536.6743909384968
Total steps: 1000
Accuracy of placed items: 48.7%
Items placed correctly first time: 43.5%
[('placed', 435), ('misplaced', 458), ('discarded', 107)]
Average value: 2.5818777084350586

Episode:2 Score:-537.3261623392318
Total steps: 1000
Accuracy of placed items: 47.3%
Items placed correctly first time: 43.3%
[('placed', 433), ('misplaced', 483), ('discarded', 84)]
Average value: 2.6978847980499268

Episode:3 Score:-532.2549734605127
Total steps: 1000
Accuracy of placed items: 49.3%
Items placed correctly first time: 44.0%
[('placed', 440), ('misplaced', 453), ('discarded', 107)]
Average value: 2.529097080230713

Episode:4 Score:-527.4594754696154
Total steps: 1000
Accuracy of placed items: 49.1%
Items placed correctly first time: 44.3%
[('placed', 443), ('misplaced', 459), ('discarded', 98)]
Average value: 2.691879987716675

Episode:5 Score:-495.9547372696834
Total steps: 1000
Accuracy of placed items: 52.2%
Items placed correctly first time: 47.7%
[(

### 4. Train RL model

In [19]:
import os
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [20]:
# Will throw an error if these don't exist
log_path = os.path.join('Training', 'Logs')

In [21]:
env = VMAllocationEnvironment(10, 10)

In [22]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [39]:
model.learn(total_timesteps=1000000)

Logging to Training\Logs\PPO_3
-----------------------------
| time/              |      |
|    fps             | 1228 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.97e+03    |
|    ep_rew_mean          | -1.5e+03    |
| time/                   |             |
|    fps                  | 855         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011542152 |
|    clip_fraction        | 0.112       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.998       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.143       |
|    n_updates            | 540         |
|  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.79e+03    |
|    ep_rew_mean          | -1.29e+03   |
| time/                   |             |
|    fps                  | 649         |
|    iterations           | 11          |
|    time_elapsed         | 34          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.010097988 |
|    clip_fraction        | 0.129       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.12       |
|    explained_variance   | 0.972       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.7        |
|    n_updates            | 630         |
|    policy_gradient_loss | -0.00575    |
|    value_loss           | 32          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.74e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.68e+03    |
|    ep_rew_mean          | -1.19e+03   |
| time/                   |             |
|    fps                  | 614         |
|    iterations           | 21          |
|    time_elapsed         | 70          |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.013820198 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.18       |
|    explained_variance   | 0.901       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.718       |
|    n_updates            | 730         |
|    policy_gradient_loss | -0.00746    |
|    value_loss           | 33.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.66e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.58e+03    |
|    ep_rew_mean          | -1.09e+03   |
| time/                   |             |
|    fps                  | 606         |
|    iterations           | 31          |
|    time_elapsed         | 104         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.013115546 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.21       |
|    explained_variance   | 0.962       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.3         |
|    n_updates            | 830         |
|    policy_gradient_loss | -0.00263    |
|    value_loss           | 32.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.59e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.57e+03    |
|    ep_rew_mean          | -1.08e+03   |
| time/                   |             |
|    fps                  | 597         |
|    iterations           | 41          |
|    time_elapsed         | 140         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.011563594 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.25       |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 2.71        |
|    n_updates            | 930         |
|    policy_gradient_loss | -0.00348    |
|    value_loss           | 30.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.55e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.53e+03    |
|    ep_rew_mean          | -1.04e+03   |
| time/                   |             |
|    fps                  | 590         |
|    iterations           | 51          |
|    time_elapsed         | 176         |
|    total_timesteps      | 104448      |
| train/                  |             |
|    approx_kl            | 0.015013839 |
|    clip_fraction        | 0.186       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.25       |
|    explained_variance   | 0.95        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.3         |
|    n_updates            | 1030        |
|    policy_gradient_loss | -0.0103     |
|    value_loss           | 29.7        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.52

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.53e+03    |
|    ep_rew_mean          | -1.04e+03   |
| time/                   |             |
|    fps                  | 588         |
|    iterations           | 61          |
|    time_elapsed         | 212         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.008581348 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.17       |
|    explained_variance   | 0.948       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.73        |
|    n_updates            | 1130        |
|    policy_gradient_loss | -0.0064     |
|    value_loss           | 62.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.53e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.51e+03    |
|    ep_rew_mean          | -1.02e+03   |
| time/                   |             |
|    fps                  | 586         |
|    iterations           | 71          |
|    time_elapsed         | 247         |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 0.010774935 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.21       |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.931       |
|    n_updates            | 1230        |
|    policy_gradient_loss | -0.00414    |
|    value_loss           | 31.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.45e+03    |
|    ep_rew_mean          | -961        |
| time/                   |             |
|    fps                  | 585         |
|    iterations           | 81          |
|    time_elapsed         | 283         |
|    total_timesteps      | 165888      |
| train/                  |             |
|    approx_kl            | 0.011902396 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.15       |
|    explained_variance   | 0.955       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.07        |
|    n_updates            | 1330        |
|    policy_gradient_loss | -0.00866    |
|    value_loss           | 30.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.45e+

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -950         |
| time/                   |              |
|    fps                  | 582          |
|    iterations           | 91           |
|    time_elapsed         | 319          |
|    total_timesteps      | 186368       |
| train/                  |              |
|    approx_kl            | 0.0061627305 |
|    clip_fraction        | 0.13         |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.03        |
|    explained_variance   | 0.961        |
|    learning_rate        | 0.0003       |
|    loss                 | 21           |
|    n_updates            | 1430         |
|    policy_gradient_loss | -0.0033      |
|    value_loss           | 30           |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -937        |
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 101         |
|    time_elapsed         | 355         |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.008567811 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.95        |
|    learning_rate        | 0.0003      |
|    loss                 | 4.47        |
|    n_updates            | 1530        |
|    policy_gradient_loss | -0.00561    |
|    value_loss           | 52.6        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -933        |
| time/                   |             |
|    fps                  | 577         |
|    iterations           | 111         |
|    time_elapsed         | 393         |
|    total_timesteps      | 227328      |
| train/                  |             |
|    approx_kl            | 0.012158287 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.1        |
|    explained_variance   | 0.965       |
|    learning_rate        | 0.0003      |
|    loss                 | 29.3        |
|    n_updates            | 1630        |
|    policy_gradient_loss | -0.00611    |
|    value_loss           | 30.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.47e+03    |
|    ep_rew_mean          | -973        |
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 121         |
|    time_elapsed         | 431         |
|    total_timesteps      | 247808      |
| train/                  |             |
|    approx_kl            | 0.010672826 |
|    clip_fraction        | 0.0777      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.17       |
|    explained_variance   | 0.945       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.4        |
|    n_updates            | 1730        |
|    policy_gradient_loss | -0.00571    |
|    value_loss           | 51.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.47e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.47e+03    |
|    ep_rew_mean          | -971        |
| time/                   |             |
|    fps                  | 575         |
|    iterations           | 131         |
|    time_elapsed         | 466         |
|    total_timesteps      | 268288      |
| train/                  |             |
|    approx_kl            | 0.016199494 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.07       |
|    explained_variance   | 0.97        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.67        |
|    n_updates            | 1830        |
|    policy_gradient_loss | -0.00992    |
|    value_loss           | 31.9        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.47

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+03     |
|    ep_rew_mean          | -1.01e+03   |
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 141         |
|    time_elapsed         | 502         |
|    total_timesteps      | 288768      |
| train/                  |             |
|    approx_kl            | 0.012758352 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.1        |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.0003      |
|    loss                 | 97.5        |
|    n_updates            | 1930        |
|    policy_gradient_loss | -0.00814    |
|    value_loss           | 33.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.51e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.54e+03    |
|    ep_rew_mean          | -1.05e+03   |
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 151         |
|    time_elapsed         | 539         |
|    total_timesteps      | 309248      |
| train/                  |             |
|    approx_kl            | 0.009526389 |
|    clip_fraction        | 0.0639      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.24       |
|    explained_variance   | 0.942       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.15        |
|    n_updates            | 2030        |
|    policy_gradient_loss | -0.00379    |
|    value_loss           | 48.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.54e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.51e+03   |
|    ep_rew_mean          | -1.02e+03  |
| time/                   |            |
|    fps                  | 571        |
|    iterations           | 161        |
|    time_elapsed         | 576        |
|    total_timesteps      | 329728     |
| train/                  |            |
|    approx_kl            | 0.01048037 |
|    clip_fraction        | 0.146      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.08      |
|    explained_variance   | 0.951      |
|    learning_rate        | 0.0003     |
|    loss                 | 19.8       |
|    n_updates            | 2130       |
|    policy_gradient_loss | -0.00965   |
|    value_loss           | 47.1       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.51e+03    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+03     |
|    ep_rew_mean          | -1e+03      |
| time/                   |             |
|    fps                  | 570         |
|    iterations           | 171         |
|    time_elapsed         | 614         |
|    total_timesteps      | 350208      |
| train/                  |             |
|    approx_kl            | 0.013980387 |
|    clip_fraction        | 0.21        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.1        |
|    explained_variance   | 0.963       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.25        |
|    n_updates            | 2230        |
|    policy_gradient_loss | -0.00814    |
|    value_loss           | 30.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+03    |
|    ep_rew_mean          | -1e+03      |
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 181         |
|    time_elapsed         | 647         |
|    total_timesteps      | 370688      |
| train/                  |             |
|    approx_kl            | 0.014080548 |
|    clip_fraction        | 0.16        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.17       |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | 37.7        |
|    n_updates            | 2330        |
|    policy_gradient_loss | -0.00452    |
|    value_loss           | 30.3        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.5e

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.46e+03    |
|    ep_rew_mean          | -964        |
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 191         |
|    time_elapsed         | 681         |
|    total_timesteps      | 391168      |
| train/                  |             |
|    approx_kl            | 0.011727452 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.06       |
|    explained_variance   | 0.949       |
|    learning_rate        | 0.0003      |
|    loss                 | 42.8        |
|    n_updates            | 2430        |
|    policy_gradient_loss | -0.00493    |
|    value_loss           | 48.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.47e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+03    |
|    ep_rew_mean          | -992        |
| time/                   |             |
|    fps                  | 576         |
|    iterations           | 201         |
|    time_elapsed         | 714         |
|    total_timesteps      | 411648      |
| train/                  |             |
|    approx_kl            | 0.011916712 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.11       |
|    explained_variance   | 0.977       |
|    learning_rate        | 0.0003      |
|    loss                 | 68.9        |
|    n_updates            | 2530        |
|    policy_gradient_loss | -0.00307    |
|    value_loss           | 19.4        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.49e+03

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+03    |
|    ep_rew_mean          | -990        |
| time/                   |             |
|    fps                  | 577         |
|    iterations           | 211         |
|    time_elapsed         | 747         |
|    total_timesteps      | 432128      |
| train/                  |             |
|    approx_kl            | 0.006627321 |
|    clip_fraction        | 0.0485      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.11       |
|    explained_variance   | 0.928       |
|    learning_rate        | 0.0003      |
|    loss                 | 17.9        |
|    n_updates            | 2630        |
|    policy_gradient_loss | -0.00282    |
|    value_loss           | 44.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.48e+03   |
|    ep_rew_mean          | -990       |
| time/                   |            |
|    fps                  | 578        |
|    iterations           | 221        |
|    time_elapsed         | 781        |
|    total_timesteps      | 452608     |
| train/                  |            |
|    approx_kl            | 0.01727916 |
|    clip_fraction        | 0.2        |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.21      |
|    explained_variance   | 0.974      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.48       |
|    n_updates            | 2730       |
|    policy_gradient_loss | -0.00105   |
|    value_loss           | 22.2       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.48e+03    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+03    |
|    ep_rew_mean          | -1e+03      |
| time/                   |             |
|    fps                  | 578         |
|    iterations           | 231         |
|    time_elapsed         | 817         |
|    total_timesteps      | 473088      |
| train/                  |             |
|    approx_kl            | 0.013104921 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.22       |
|    explained_variance   | 0.996       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.206       |
|    n_updates            | 2830        |
|    policy_gradient_loss | -0.0076     |
|    value_loss           | 0.656       |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.49e+03

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+03     |
|    ep_rew_mean          | -1e+03      |
| time/                   |             |
|    fps                  | 575         |
|    iterations           | 241         |
|    time_elapsed         | 857         |
|    total_timesteps      | 493568      |
| train/                  |             |
|    approx_kl            | 0.009148577 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2          |
|    explained_variance   | 0.94        |
|    learning_rate        | 0.0003      |
|    loss                 | 19.7        |
|    n_updates            | 2930        |
|    policy_gradient_loss | -0.00213    |
|    value_loss           | 38.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+03     |
|    ep_rew_mean          | -1.01e+03   |
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 251         |
|    time_elapsed         | 895         |
|    total_timesteps      | 514048      |
| train/                  |             |
|    approx_kl            | 0.020730138 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.16       |
|    explained_variance   | 0.965       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.383       |
|    n_updates            | 3030        |
|    policy_gradient_loss | -0.00229    |
|    value_loss           | 21.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.51e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.52e+03   |
|    ep_rew_mean          | -1.03e+03  |
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 261        |
|    time_elapsed         | 933        |
|    total_timesteps      | 534528     |
| train/                  |            |
|    approx_kl            | 0.01769873 |
|    clip_fraction        | 0.0967     |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.05      |
|    explained_variance   | 0.965      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.26       |
|    n_updates            | 3130       |
|    policy_gradient_loss | -0.000303  |
|    value_loss           | 38.2       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.52e+03   |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+03     |
|    ep_rew_mean          | -1e+03      |
| time/                   |             |
|    fps                  | 571         |
|    iterations           | 271         |
|    time_elapsed         | 971         |
|    total_timesteps      | 555008      |
| train/                  |             |
|    approx_kl            | 0.011977205 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.06       |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 30.1        |
|    n_updates            | 3230        |
|    policy_gradient_loss | -0.00628    |
|    value_loss           | 26.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.5e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+03    |
|    ep_rew_mean          | -1e+03      |
| time/                   |             |
|    fps                  | 571         |
|    iterations           | 281         |
|    time_elapsed         | 1006        |
|    total_timesteps      | 575488      |
| train/                  |             |
|    approx_kl            | 0.012936113 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.22       |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.85        |
|    n_updates            | 3330        |
|    policy_gradient_loss | -0.00567    |
|    value_loss           | 25.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+03    |
|    ep_rew_mean          | -994        |
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 291         |
|    time_elapsed         | 1040        |
|    total_timesteps      | 595968      |
| train/                  |             |
|    approx_kl            | 0.008892373 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.97       |
|    explained_variance   | 0.976       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.32        |
|    n_updates            | 3430        |
|    policy_gradient_loss | -0.00696    |
|    value_loss           | 13.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.49e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.48e+03    |
|    ep_rew_mean          | -983        |
| time/                   |             |
|    fps                  | 573         |
|    iterations           | 301         |
|    time_elapsed         | 1075        |
|    total_timesteps      | 616448      |
| train/                  |             |
|    approx_kl            | 0.008918295 |
|    clip_fraction        | 0.0917      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.2        |
|    explained_variance   | 0.956       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.3        |
|    n_updates            | 3530        |
|    policy_gradient_loss | -0.00152    |
|    value_loss           | 49.5        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.47

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.48e+03    |
|    ep_rew_mean          | -981        |
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 311         |
|    time_elapsed         | 1108        |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.009186782 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.15       |
|    explained_variance   | 0.963       |
|    learning_rate        | 0.0003      |
|    loss                 | 40.7        |
|    n_updates            | 3630        |
|    policy_gradient_loss | -0.00333    |
|    value_loss           | 22.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.47e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -936        |
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 321         |
|    time_elapsed         | 1144        |
|    total_timesteps      | 657408      |
| train/                  |             |
|    approx_kl            | 0.011478623 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.24       |
|    explained_variance   | 0.968       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.75        |
|    n_updates            | 3730        |
|    policy_gradient_loss | -0.00572    |
|    value_loss           | 19.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.4e+03    |
|    ep_rew_mean          | -900       |
| time/                   |            |
|    fps                  | 572        |
|    iterations           | 331        |
|    time_elapsed         | 1183       |
|    total_timesteps      | 677888     |
| train/                  |            |
|    approx_kl            | 0.00915391 |
|    clip_fraction        | 0.11       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.01      |
|    explained_variance   | 0.956      |
|    learning_rate        | 0.0003     |
|    loss                 | 44.9       |
|    n_updates            | 3830       |
|    policy_gradient_loss | -0.00028   |
|    value_loss           | 22.9       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.39e+03    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.4e+03     |
|    ep_rew_mean          | -898        |
| time/                   |             |
|    fps                  | 571         |
|    iterations           | 341         |
|    time_elapsed         | 1221        |
|    total_timesteps      | 698368      |
| train/                  |             |
|    approx_kl            | 0.013217827 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.94       |
|    explained_variance   | 0.952       |
|    learning_rate        | 0.0003      |
|    loss                 | 125         |
|    n_updates            | 3930        |
|    policy_gradient_loss | -0.00623    |
|    value_loss           | 51.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.39e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.36e+03   |
|    ep_rew_mean          | -857       |
| time/                   |            |
|    fps                  | 570        |
|    iterations           | 351        |
|    time_elapsed         | 1260       |
|    total_timesteps      | 718848     |
| train/                  |            |
|    approx_kl            | 0.01272328 |
|    clip_fraction        | 0.125      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.86      |
|    explained_variance   | 0.947      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.51       |
|    n_updates            | 4030       |
|    policy_gradient_loss | -0.00492   |
|    value_loss           | 13.4       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.36e+03    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.32e+03    |
|    ep_rew_mean          | -814        |
| time/                   |             |
|    fps                  | 568         |
|    iterations           | 361         |
|    time_elapsed         | 1299        |
|    total_timesteps      | 739328      |
| train/                  |             |
|    approx_kl            | 0.015284738 |
|    clip_fraction        | 0.182       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.18       |
|    explained_variance   | 0.957       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.649       |
|    n_updates            | 4130        |
|    policy_gradient_loss | -0.00697    |
|    value_loss           | 25.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.32e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.32e+03   |
|    ep_rew_mean          | -813       |
| time/                   |            |
|    fps                  | 570        |
|    iterations           | 371        |
|    time_elapsed         | 1332       |
|    total_timesteps      | 759808     |
| train/                  |            |
|    approx_kl            | 0.01664269 |
|    clip_fraction        | 0.162      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.16      |
|    explained_variance   | 0.956      |
|    learning_rate        | 0.0003     |
|    loss                 | 15.6       |
|    n_updates            | 4230       |
|    policy_gradient_loss | -0.00916   |
|    value_loss           | 36         |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.31e+03    |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.31e+03    |
|    ep_rew_mean          | -806        |
| time/                   |             |
|    fps                  | 570         |
|    iterations           | 381         |
|    time_elapsed         | 1368        |
|    total_timesteps      | 780288      |
| train/                  |             |
|    approx_kl            | 0.009994322 |
|    clip_fraction        | 0.0932      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.05       |
|    explained_variance   | 0.974       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.1        |
|    n_updates            | 4330        |
|    policy_gradient_loss | -0.00387    |
|    value_loss           | 10.5        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.31e+03

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.35e+03    |
|    ep_rew_mean          | -845        |
| time/                   |             |
|    fps                  | 567         |
|    iterations           | 391         |
|    time_elapsed         | 1410        |
|    total_timesteps      | 800768      |
| train/                  |             |
|    approx_kl            | 0.010532303 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.935       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.28        |
|    n_updates            | 4430        |
|    policy_gradient_loss | -0.00418    |
|    value_loss           | 45.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.35e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.37e+03    |
|    ep_rew_mean          | -862        |
| time/                   |             |
|    fps                  | 565         |
|    iterations           | 401         |
|    time_elapsed         | 1453        |
|    total_timesteps      | 821248      |
| train/                  |             |
|    approx_kl            | 0.008351936 |
|    clip_fraction        | 0.0768      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.2        |
|    explained_variance   | 0.953       |
|    learning_rate        | 0.0003      |
|    loss                 | 49.4        |
|    n_updates            | 4530        |
|    policy_gradient_loss | -0.00422    |
|    value_loss           | 21.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.37e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.3e+03     |
|    ep_rew_mean          | -794        |
| time/                   |             |
|    fps                  | 562         |
|    iterations           | 411         |
|    time_elapsed         | 1496        |
|    total_timesteps      | 841728      |
| train/                  |             |
|    approx_kl            | 0.010251373 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.15       |
|    explained_variance   | 0.957       |
|    learning_rate        | 0.0003      |
|    loss                 | 30          |
|    n_updates            | 4630        |
|    policy_gradient_loss | -0.00819    |
|    value_loss           | 38.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.3e+0

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.31e+03   |
|    ep_rew_mean          | -810       |
| time/                   |            |
|    fps                  | 561        |
|    iterations           | 421        |
|    time_elapsed         | 1536       |
|    total_timesteps      | 862208     |
| train/                  |            |
|    approx_kl            | 0.00813972 |
|    clip_fraction        | 0.124      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.16      |
|    explained_variance   | 0.964      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.57       |
|    n_updates            | 4730       |
|    policy_gradient_loss | -0.00187   |
|    value_loss           | 32.1       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.31e+03    |
|    ep_rew_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.32e+03     |
|    ep_rew_mean          | -810         |
| time/                   |              |
|    fps                  | 559          |
|    iterations           | 431          |
|    time_elapsed         | 1576         |
|    total_timesteps      | 882688       |
| train/                  |              |
|    approx_kl            | 0.0125161875 |
|    clip_fraction        | 0.16         |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.976        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.04         |
|    n_updates            | 4830         |
|    policy_gradient_loss | -0.000571    |
|    value_loss           | 20.5         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.31e+03    |
|    ep_rew_mean          | -802        |
| time/                   |             |
|    fps                  | 559         |
|    iterations           | 441         |
|    time_elapsed         | 1614        |
|    total_timesteps      | 903168      |
| train/                  |             |
|    approx_kl            | 0.010466294 |
|    clip_fraction        | 0.135       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.16       |
|    explained_variance   | 0.954       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.56        |
|    n_updates            | 4930        |
|    policy_gradient_loss | -0.00376    |
|    value_loss           | 40.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.32e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.34e+03    |
|    ep_rew_mean          | -828        |
| time/                   |             |
|    fps                  | 558         |
|    iterations           | 451         |
|    time_elapsed         | 1654        |
|    total_timesteps      | 923648      |
| train/                  |             |
|    approx_kl            | 0.008093296 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.99       |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 18.6        |
|    n_updates            | 5030        |
|    policy_gradient_loss | 8.29e-05    |
|    value_loss           | 44.4        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.36e+03

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.33e+03    |
|    ep_rew_mean          | -822        |
| time/                   |             |
|    fps                  | 558         |
|    iterations           | 461         |
|    time_elapsed         | 1690        |
|    total_timesteps      | 944128      |
| train/                  |             |
|    approx_kl            | 0.010606836 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.14       |
|    explained_variance   | 0.979       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.04        |
|    n_updates            | 5130        |
|    policy_gradient_loss | -0.00554    |
|    value_loss           | 11.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.34e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.34e+03    |
|    ep_rew_mean          | -828        |
| time/                   |             |
|    fps                  | 559         |
|    iterations           | 471         |
|    time_elapsed         | 1724        |
|    total_timesteps      | 964608      |
| train/                  |             |
|    approx_kl            | 0.016080461 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.22       |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.25        |
|    n_updates            | 5230        |
|    policy_gradient_loss | -0.00399    |
|    value_loss           | 25.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.35e+

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | -902        |
| time/                   |             |
|    fps                  | 559         |
|    iterations           | 481         |
|    time_elapsed         | 1759        |
|    total_timesteps      | 985088      |
| train/                  |             |
|    approx_kl            | 0.015922472 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.03       |
|    explained_variance   | 0.982       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.77        |
|    n_updates            | 5330        |
|    policy_gradient_loss | -0.00746    |
|    value_loss           | 19.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+

<stable_baselines3.ppo.ppo.PPO at 0x20601556a60>

### 5. Save model

In [40]:
PPO_Path = os.path.join('Training', 'Saved Models', 'VM_Allocation')

In [41]:
model.save(PPO_Path)

### 6. Load model

In [26]:
model = PPO.load(PPO_Path, env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


### 7. Test model

In [46]:
MAX_STEPS = 1000
episodes = 10

for episode in range(1, episodes+1):
    obs = env.reset()
    steps = 0
    done = False
    score = 0
    
    session_info = Counter({ 'placed':0, 'misplaced':0, 'discarded':0 })
    
    while not done and steps < MAX_STEPS:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        steps += 1
        session_info += Counter(info)

    print('Episode:{} Score:{}'.format(episode,score))
    print('Total steps: {}'.format(steps))
    
    # Counter method sometimes shuffles keys; this returns the order
    ordered_info = \
        sorted(dict(session_info).items(), key=lambda x:x[0], reverse=True)
    
    placed, misplaced, discarded = ordered_info[:]
    accuracy = (placed[1] / (placed[1] + misplaced[1])) * 100
    print('Accuracy of placed items: {:.3}%'.format(accuracy))
    
    first_time = (placed[1] / (placed[1] + misplaced[1] + discarded[1])) * 100
    print('Items placed correctly first time: {:.3}%'.format(first_time))
    
    print(ordered_info)
    print(f'Average value: {np.mean(np.take(env.state, 0, axis=1))}')
    
    print()
    #real_data.log(env)
    #env.logs = { 'placed':0, 'misplaced':0, 'discarded':0 }

Episode:1 Score:-561.0253037809598
Total steps: 1000
Accuracy of placed items: 45.2%
Items placed correctly first time: 41.2%
[('placed', 412), ('misplaced', 499), ('discarded', 89)]
Average value: 2.459066867828369

Episode:2 Score:-529.583324426887
Total steps: 984
Accuracy of placed items: 48.1%
Items placed correctly first time: 43.3%
[('placed', 426), ('misplaced', 460), ('discarded', 98)]
Average value: 2.5854508876800537

Episode:3 Score:-511.7999363955896
Total steps: 1000
Accuracy of placed items: 51.5%
Items placed correctly first time: 46.0%
[('placed', 460), ('misplaced', 434), ('discarded', 106)]
Average value: 2.57601261138916

Episode:4 Score:-505.14508586852753
Total steps: 1000
Accuracy of placed items: 51.5%
Items placed correctly first time: 46.8%
[('placed', 468), ('misplaced', 441), ('discarded', 91)]
Average value: 2.441592216491699

Episode:5 Score:-457.37128063393175
Total steps: 1000
Accuracy of placed items: 59.5%
Items placed correctly first time: 51.2%
[('pl

In [43]:
print(env.state)

[[ 3.3617446e+00  5.5115674e-02  7.8182378e+00  8.3119440e+00]
 [ 3.0016756e+00  7.8694038e-03  7.2675095e+00  8.2526436e+00]
 [ 2.4265597e+00  1.7322800e-01  8.4785309e+00  7.0630527e+00]
 [ 2.7154491e+00  7.8718476e-03  7.8180442e+00  8.5279608e+00]
 [ 2.5609779e+00  7.8721158e-03  8.0134430e+00  8.2185974e+00]
 [ 2.8158779e+00  1.5745211e-02  7.8718419e+00  8.8276234e+00]
 [ 2.3429713e+00  2.3621429e-02  8.3371201e+00  7.9077559e+00]
 [ 4.1386638e+00  7.8719035e-03  7.4967937e+00  7.6664705e+00]
 [ 3.3538477e+00  7.8715943e-03  7.1209106e+00  8.5036507e+00]
 [ 2.5640516e+00  7.8703351e-03  8.0867071e+00  7.7650552e+00]
 [ 2.0593639e-02 -1.1811024e-01 -8.0696810e-03 -2.9190991e-02]]
