# Resource Allocation Code Demo

The Food Bank of the Southern Tier (FBST) is a member of Feeding America, focused on providing food security for people with limited financial resources, and serves six counties and nearly 4,000 square miles in the New York.  Under normal operations (non COVID times), the Mobile Food Pantry program is among the main activities of the FBST.  The goal of the service is to make nutritious and healthy food more accessible to people in underserved communities.  Even in areas where other agencies provide assistance, clients may not always have access to food due to limited public transportation options, or because those agencies are only open hours or days per work.

Here we do a sample experiment testing out some of the existing and developed algorithms against a randomized heuristic.

In [1]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

In [2]:
# Getting out configuration parameter for the environment
CONFIG = or_suite.envs.env_configs.resource_allocation_foodbank_config(3)
# CONFIG = or_suite.envs.env_configs.resource_allocation_default_config


# Specifying training iteration, epLen, number of episodes, and number of iterations
epLen = CONFIG['num_rounds']
nEps = 1
numIters = 200

# Configuration parameters for running the experiment
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/resource/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, # save trajectory for calculating additional metrics
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False # indicator for pickling final information
                    }

resource_env = gym.make('Resource-v0', config=CONFIG)
mon_env = Monitor(resource_env)


In [3]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
#  'Random': or_suite.agents.rl.random.randomAgent(),
#  'Equal': or_suite.agents.resource_allocation.equal_allocation.equalAllocationAgent(epLen, CONFIG),
#  'FixedThreshold': or_suite.agents.resource_allocation.fixed_threshold.fixedThresholdAgent(epLen, CONFIG),
 'Guardrail-0.5': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.5),
 'Guardrail-0.3': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.3),
 'Guardrail-0.25': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.25)
}

Mean and variance endomwnets:
[[39.20587486 50.08853996 35.37559313]
 [46.55235012 59.84495274 42.59752066]
 [72.245288   90.96648334 62.16188918]] [[  92.94859855  162.47584338  589.93066379]
 [ 199.32553707  339.36510156  926.26811452]
 [ 988.84055289 1587.19440903 2584.57344204]]
Mean and variance endomwnets:
[[ 7.47731355 31.62648643 34.84660757]
 [ 8.92689891 37.88511292 42.68660077]
 [13.61348708 57.09146433 63.15584145]] [[   3.49659948   66.09740516  580.48954987]
 [   7.61656482  130.85082927  963.74654124]
 [  34.18610677  659.01948972 2581.91028622]]
Mean and variance endomwnets:
[[ 4.25584282 52.99767184 34.39917429]
 [ 5.15490496 65.31671668 40.29901029]
 [ 7.57768503 97.97768305 62.76702875]] [[1.12200259e+00 1.74004003e+02 6.08319619e+02]
 [2.40380921e+00 3.78433348e+02 8.86329133e+02]
 [1.14630121e+01 1.86728929e+03 2.75482193e+03]]


# Step 5: Run Simulations

Run the different heuristics in the environment

In [4]:
import warnings
warnings.simplefilter('ignore')

In [5]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/resource_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'AdaQL' or agent == 'Unif QL' or agent == 'AdaMB' or agent == 'Unif MB':
        or_suite.utils.run_single_algo_tune(resource_env, agents[agent], scaling_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(resource_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/resource_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/resource_'+str(agent)+'/')
        algo_list_radar.append(str(agent))     
        
fig_path = '../figures/'
fig_name = 'resource'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)        
        
additional_metric = { 'Efficiency': lambda traj : or_suite.utils.delta_EFFICIENCY(traj, CONFIG),
                    'Hindsight Envy': lambda traj : or_suite.utils.delta_HINDSIGHT_ENVY(traj, CONFIG),
                      'Counterfactual Envy': lambda traj : or_suite.utils.delta_COUNTERFACTUAL_ENVY(traj, CONFIG),
                    'Budget': lambda traj : or_suite.utils.times_out_of_budget(traj, CONFIG)}
#                       'Prop': lambda traj : or_suite.utils.delta_PROP(traj, CONFIG), \
#                       'Exante Envy': lambda traj : or_suite.utils.delta_EXANTE_ENVY(traj, CONFIG)}
fig_name = 'resource'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)

Guardrail-0.5

env reset!
starting_state [392.3      392.3      392.3      392.3      392.3       10.932322
   7.617694  10.668706]
Lower and Upper Solutions:
[[0.13917 0.14981 0.25356 0.15351 0.     ]
 [0.23113 0.21565 0.      0.21047 0.     ]
 [0.09738 0.10151 0.18536 0.10281 0.33458]]
[[0.3285  0.35372 0.60246 0.3625  0.     ]
 [0.54705 0.51015 0.      0.49778 0.     ]
 [0.23072 0.24067 0.43707 0.24381 0.79162]]
timestep:  0
new state [382.08      381.97922   381.04694   381.94394   383.84515     7.2902865
  13.422441    1.0207489]
reward:  -0.9003692079730182
timestep:  1
new state [372.10687  372.30737  376.20197  372.3709   383.02676   18.746086
  22.220646  32.167324]
reward:  -0.7707156249571203
timestep:  2
new state [263.3      263.3      263.3      263.3      263.3       34.28867
  41.664326  77.80902 ]
reward:  -0.8999260161235547

env reset!
starting_state [263.3       263.3       263.3       263.3       263.3         8.275917
   6.1462727  23.933739 ]
timestep:  0
new sta

timestep:  0
new state [777.6495  778.1482  791.07965 778.2958  827.68884  75.25718  84.63726
 166.01964]
reward:  -0.846738367822145
timestep:  1
new state [668.32263  668.3946   673.1357   668.40704  696.18445   28.833546
  56.928272 183.26923 ]
reward:  -0.9200261888742641
timestep:  2
new state [632.4      632.4      632.4      632.4      632.4       50.193836
  74.47444   70.67178 ]
reward:  -0.9548446090371185

env reset!
starting_state [632.4     632.4     632.4     632.4     632.4      68.8054   84.22174
 127.70535]
timestep:  0
new state [534.25977  534.36163  535.08923  534.3983   531.2294    67.63453
  54.094967 143.87057 ]
reward:  -0.902883538754635
timestep:  1
new state [449.25537  448.21606  431.43356  447.87634  417.27768   63.066257
  71.81964   87.96967 ]
reward:  -0.938002654433394
timestep:  2
new state [605.1      605.1      605.1      605.1      605.1        8.978854
   6.497436   7.013225]
reward:  -0.8893718790254584

env reset!
starting_state [605.1      605.1

timestep:  0
new state [260.7613   260.21527  297.5957   260.04694  286.53488   35.394726
  60.797546  69.90704 ]
reward:  -1.2115474344111565
timestep:  1
new state [199.74588  199.85506  245.68713  199.90851  231.14697   30.988016
  41.87631   13.387293]
reward:  -0.8822384412401337
timestep:  2
new state [477.8     477.8     477.8     477.8     477.8      29.97029  79.58301
  71.92901]
reward:  -0.8174208175723034

env reset!
starting_state [477.8      477.8      477.8      477.8      477.8       29.97012
  47.961544  19.105997]
timestep:  0
new state [437.3093    438.13312   451.36954   438.4033    462.63632    44.03313
  61.8908      7.9277773]
reward:  -0.8209004714661026
timestep:  1
new state [387.15796 389.07614 421.3454  389.7004  456.3076   71.95437  68.23424
 121.90351]
reward:  -0.7917693910691611
timestep:  2
new state [714.6      714.6      714.6      714.6      714.6       71.721085
  91.92644  256.4721  ]
reward:  -0.9139102138775407

env reset!
starting_state [714.6  

timestep:  1
new state [545.87335  545.7317   544.5032   545.6818   548.9283    46.625374
  40.536503  65.2993  ]
reward:  -0.8396585025478865
timestep:  2
new state [454.4     454.4     454.4     454.4     454.4      75.72425  94.26665
  65.86504]
reward:  -0.9076098209698429

env reset!
starting_state [454.4      454.4      454.4      454.4      454.4       16.239038
  26.541824  42.66182 ]
timestep:  0
new state [424.70282  424.84818  425.95715  424.89996  420.60666   18.141928
   6.920915  28.72264 ]
reward:  -0.9062699301163597
timestep:  1
new state [408.33023  407.98764  402.4701   407.87555  397.85672   20.395384
  17.43484   28.010838]
reward:  -0.9552226060752397
timestep:  2
new state [292.1     292.1     292.1     292.1     292.1      52.04399  50.57515
  69.5221 ]
reward:  -0.9074862298578679

env reset!
starting_state [292.1     292.1     292.1     292.1     292.1      44.41743  53.63957
  93.22526]
timestep:  0
new state [264.44244  264.41516  263.53046  264.40747  260.8

timestep:  2
new state [221.7       221.7       221.7       221.7       221.7         9.273838
   7.4962244   6.80624  ]
reward:  -0.8756082906627898

env reset!
starting_state [221.7      221.7      221.7      221.7      221.7       18.730669
  12.072046  12.40901 ]
timestep:  0
new state [215.09465  215.03098  214.64449  215.00807  217.53279   14.357786
  11.766319  30.252405]
reward:  -1.7465311532838894
timestep:  1
new state [196.96152  196.6689   192.76619  196.5705   207.39787    5.742756
   8.286935   8.829516]
reward:  -1.0382833444533834
timestep:  2
new state [170.       170.       170.       170.       170.        14.507949
  18.000618  64.7807  ]
reward:  -0.8784354063785779

env reset!
starting_state [170.        170.        170.        170.        170.          5.2659383
   8.845427    8.063645 ]
timestep:  0
new state [166.43745  166.48505  167.16566  166.5009   167.29501    5.69863
   7.238175  16.642036]
reward:  -1.727733961079378
timestep:  1
new state [162.35081  1

timestep:  1
new state [186.28113  188.6745   220.39314  189.4816   235.64359   89.50899
 101.893684  97.4443  ]
reward:  -0.8886655529086533
timestep:  2
new state [645.7      645.7      645.7      645.7      645.7       40.498337
  61.951805  56.39791 ]
reward:  -1.4303422330759046

env reset!
starting_state [645.7      645.7      645.7      645.7      645.7       60.89885
  93.65985   60.738705]
timestep:  0
new state [560.44446  561.7603   582.417    562.1935   597.5408    36.809868
  81.66664   84.302216]
reward:  -0.8467802631505602
timestep:  1
new state [484.22647  486.7887   523.3537   487.64417  530.7462    36.449165
  73.52696   85.32822 ]
reward:  -0.8726142318566452
timestep:  2
new state [651.5      651.5      651.5      651.5      651.5       56.152573
  43.663044 101.74984 ]
reward:  -0.8818529287381127

env reset!
starting_state [651.5      651.5      651.5      651.5      651.5       34.027843
  53.530132  50.63875 ]
timestep:  0
new state [599.3548   599.968    608.8

timestep:  2
new state [330.6      330.6      330.6      330.6      330.6       58.317585
  94.88584   52.729427]
reward:  -1.308347366084725

env reset!
starting_state [330.6      330.6      330.6      330.6      330.6       18.382423
  27.60437   15.548024]
timestep:  0
new state [305.87317 306.27347 312.71594 306.40472 318.2689    9.41874  17.17638
  26.21136]
reward:  -0.8395160469848071
timestep:  1
new state [287.3353   287.87106  295.57675  288.04977  297.50616   36.315716
  62.315144 154.86093 ]
reward:  -0.9023141544412717
timestep:  2
new state [348.7      348.7      348.7      348.7      348.7       24.402864
  22.88881   21.793228]
reward:  -0.937057424968226

env reset!
starting_state [348.7      348.7      348.7      348.7      348.7        5.652894
   3.737667   8.455528]
timestep:  0
new state [342.84747  342.7587   341.59683  342.72876  342.00174    8.592444
  11.411427   9.920372]
reward:  -0.9274669520350772
timestep:  1
new state [331.4934   331.5103   332.0786   33

timestep:  2
new state [340.5      340.5      340.5      340.5      340.5       56.902996
  37.050056  75.11639 ]
reward:  -0.8058171038832043

env reset!
starting_state [340.5      340.5      340.5      340.5      340.5       57.523327
  85.151634 216.79782 ]
timestep:  0
new state [291.7016   291.51233  285.68616  291.45874  267.89246   66.74836
  50.117027 102.25382 ]
reward:  -1.7993861316062063
timestep:  1
new state [218.76624   217.72546   200.7558    217.3847    233.62193     3.8319468
  12.39549    12.854346 ]
reward:  -1.0106804606042956
timestep:  2
new state [407.2      407.2      407.2      407.2      407.2       41.882137
  39.20108   12.969887]
reward:  -0.8704319708870958

env reset!
starting_state [407.2      407.2      407.2      407.2      407.2       61.706684
  85.55704  109.112495]
timestep:  0
new state [314.95096 315.4661  322.29163 315.64005 320.75073  72.93623 109.75057
 147.65816]
reward:  -0.890737451691481
timestep:  1
new state [196.88467  198.14095  213.7

timestep:  2
new state [796.3     796.3     796.3     796.3     796.3      96.69082 115.95339
 225.6768 ]
reward:  -0.8929002261425006

env reset!
starting_state [796.3      796.3      796.3      796.3      796.3       53.358135
  39.631744  37.793907]
timestep:  0
new state [748.37146  748.1122   747.6154   748.01526  766.3351    54.07917
  59.410973  96.28825 ]
reward:  -0.8798252178546391
timestep:  1
new state [675.8901  675.5011  672.9205  675.36194 690.0546   68.82898  72.75336
  51.34048]
reward:  -0.9075231336634181
timestep:  2
new state [439.6      439.6      439.6      439.6      439.6        8.235048
   6.769574  13.642383]
reward:  -0.8587220590016444

env reset!
starting_state [439.6      439.6      439.6      439.6      439.6       24.99323
  19.304762  52.84357 ]
timestep:  0
new state [408.637     408.1932    401.43658   408.04663   397.74582     7.6029677
  21.218925   16.681393 ]
reward:  -0.9395154208662543
timestep:  1
new state [390.68286 390.66434 389.55457 390.6

new state [347.21545  346.65747  335.0844   346.48343  311.9552    31.277657
  61.211998  37.902855]
reward:  -0.945664188757322
timestep:  1
new state [294.70978  295.24457  299.64407  295.43408  281.9043    45.752937
 104.93855  131.14943 ]
reward:  -0.8395568226754555
timestep:  2
new state [566.1     566.1     566.1     566.1     566.1      42.18442  34.97069
  90.92688]
reward:  -0.8866858638707033

env reset!
starting_state [566.1       566.1       566.1       566.1       566.1         7.4680643
   3.575111   19.022127 ]
timestep:  0
new state [557.3022   557.05646  553.285    556.9754   551.03613   10.686058
   7.467669  18.518513]
reward:  -0.9708737272188568
timestep:  1
new state [545.434    545.01013  538.74945  544.86945  536.36743   47.531406
  46.26605  139.86356 ]
reward:  -0.9330734014215745
timestep:  2
new state [454.6      454.6      454.6      454.6      454.6       82.09983
  64.70097   89.103966]
reward:  -0.9470355786629647

env reset!
starting_state [454.6      

timestep:  2
new state [369.3       369.3       369.3       369.3       369.3         9.954162
   6.2021093  11.368587 ]
reward:  -0.8844804183770909

env reset!
starting_state [369.3      369.3      369.3      369.3      369.3       68.656334
  57.28457   84.397415]
timestep:  0
new state [295.9367   295.47922  291.02106  295.32     340.99933   51.262974
  73.26143   38.579063]
reward:  -0.9788072159692502
timestep:  1
new state [230.1182    230.68735   243.23878   230.86314   310.3971      4.4276123
   7.0010343  14.069587 ]
reward:  -0.8372051517575029
timestep:  2
new state [405.9      405.9      405.9      405.9      405.9       35.008423
  64.534256  63.143757]
reward:  -0.9221084978169882

env reset!
starting_state [405.9      405.9      405.9      405.9      405.9       53.765198
  93.29372   57.67846 ]
timestep:  0
new state [323.89423  325.4069   348.25244  325.90778  360.16705   58.072224
  58.741592 165.6324  ]
reward:  -0.8417017944063266
timestep:  1
new state [234.4682  

timestep:  2
new state [491.9      491.9      491.9      491.9      491.9       46.427876
  87.64983   11.633492]
reward:  -0.21329867772702543

env reset!
starting_state [491.9       491.9       491.9       491.9       491.9         3.8670585
   6.420634   14.785695 ]
timestep:  0
new state [473.67764  473.60507  473.14514  473.5795   464.27502    9.356492
   9.539852  20.92689 ]
reward:  -0.12833529843612052
timestep:  1
new state [444.43353   444.16492   441.34174   444.07336   425.17395     7.4263606
  11.283441   20.330017 ]
reward:  -0.12371763169726434
timestep:  2
new state [88.7       88.7       88.7       88.7       88.7        7.0896263
  7.5003157 10.886014 ]
reward:  -0.11104111663680143

env reset!
starting_state [88.7      88.7      88.7      88.7      88.7      17.161901 18.034266
 41.1559  ]
timestep:  0
new state [73.03816  72.9457   71.63156  72.91485  67.09654  17.53796  23.187601
 35.562305]
reward:  -1.396540995398339
timestep:  1
new state [56.344723 56.302315 55

timestep:  1
new state [172.82887  172.58922  169.17245  172.50935  157.71289   43.378193
  44.497005  85.76919 ]
reward:  -1.3699374558425945
timestep:  2
new state [359.8       359.8       359.8       359.8       359.8        15.8702135
  26.540382   56.178318 ]
reward:  -1.3855935534625032

env reset!
starting_state [359.8      359.8      359.8      359.8      359.8       70.43213
  40.849953  44.25658 ]
timestep:  0
new state [324.78668  324.61813  321.54785  324.56384  336.53223   46.474483
  35.458675 173.63602 ]
reward:  -1.3558672824508977
timestep:  1
new state [277.98593   276.84564   259.47827   276.46707   245.4209      6.7724886
   8.5429325  10.706835 ]
reward:  -1.439145474473164
timestep:  2
new state [415.       415.       415.       415.       415.        20.721453
  57.89818   73.81426 ]
reward:  -0.08677986892045125

env reset!
starting_state [415.       415.       415.       415.       415.        68.53038
  69.694214 136.3754  ]
timestep:  0
new state [357.2779   

timestep:  1
new state [405.16583 405.69376 414.88867 405.86462 392.2445   78.61618  69.34076
   7.90992]
reward:  -0.11215937978440917
timestep:  2
new state [567.2      567.2      567.2      567.2      567.2       48.755356
  71.0898   147.91473 ]
reward:  -0.009883126728084933

env reset!
starting_state [567.2     567.2     567.2     567.2     567.2      47.41181  67.01342
  21.97061]
timestep:  0
new state [438.94492  441.95862  482.88086  442.96356  526.1015    39.510483
  69.15841   52.063046]
reward:  -0.013424524089683141
timestep:  1
new state [298.69992  304.2469   381.75403  306.09036  428.79288   66.06673
  58.279484  79.13802 ]
reward:  -0.05018645761071643
timestep:  2
new state [573.4      573.4      573.4      573.4      573.4       55.864265
  61.33183   25.775007]
reward:  -0.09394105856485205

env reset!
starting_state [573.4      573.4      573.4      573.4      573.4       52.335735
  46.926376 107.326164]
timestep:  0
new state [422.60504  421.29852  404.40305  42

timestep:  2
new state [323.9      323.9      323.9      323.9      323.9       75.30197
  49.787437 157.8408  ]
reward:  0.017225488049293775

env reset!
starting_state [323.9      323.9      323.9      323.9      323.9       48.901306
  35.69111   80.94333 ]
timestep:  0
new state [289.85925  289.53305  284.5347   289.42496  281.40372   75.08307
  35.721703 140.8654  ]
reward:  -1.3948178775140627
timestep:  1
new state [241.7153   240.37653  219.87427  239.93217  207.46582   49.725544
  55.137745  53.014866]
reward:  -1.4222650794874656
timestep:  2
new state [457.3       457.3       457.3       457.3       457.3        11.368492
   9.109175    3.2090178]
reward:  -1.341686529550217

env reset!
starting_state [457.3      457.3      457.3      457.3      457.3       35.169315
  48.360188 143.0966  ]
timestep:  0
new state [299.42203  297.71484  407.33575  297.13458  382.2055    43.604095
  53.093307  87.836815]
reward:  -0.5031091136593624
timestep:  1
new state [259.55634  257.8839 

timestep:  1
new state [491.76053  492.2436   498.65274  492.40555  507.75214    5.648247
  11.647231  20.138252]
reward:  -0.005274255797191212
timestep:  2
new state [113.3       113.3       113.3       113.3       113.3         9.192139
   9.424058    9.3811455]
reward:  -0.10797518501231433

env reset!
starting_state [113.3      113.3      113.3      113.3      113.3        9.077349
   5.139045  17.247025]
timestep:  0
new state [107.16837   107.061295  105.42705   107.025734  104.247       6.418506
   6.6911726   5.971604 ]
reward:  -1.4155026952121057
timestep:  1
new state [102.685555 102.61034  101.4454   102.58548  101.1084    74.924
  49.702732  99.698296]
reward:  -1.338267014256519
timestep:  2
new state [506.3      506.3      506.3      506.3      506.3       67.775085
  74.19544   67.98529 ]
reward:  -1.3873755467625568

env reset!
starting_state [506.3      506.3      506.3      506.3      506.3       46.976486
  42.63326   92.41359 ]
timestep:  0
new state [372.31396  3

timestep:  2
new state [308.7      308.7      308.7      308.7      308.7       49.596004
  27.82501   10.060733]
reward:  -0.07462422724580198

env reset!
starting_state [308.7      308.7      308.7      308.7      308.7       41.31072
  52.407177  95.68648 ]
timestep:  0
new state [268.41696  268.40387  268.47168  268.39926  258.46655   31.152412
  69.70468  113.64171 ]
reward:  -1.3821339149958904
timestep:  1
new state [221.76675  222.06784  227.54352  222.16664  198.81218   11.565746
  19.625528  35.917618]
reward:  -1.3736967431131257
timestep:  2
new state [288.7      288.7      288.7      288.7      288.7       20.998123
  27.793528  59.579525]
reward:  -0.250834290562425

env reset!
starting_state [288.7      288.7      288.7      288.7      288.7       52.72184
  55.522614  90.40519 ]
timestep:  0
new state [245.73233  245.70908  245.45064  245.7016   241.22928   45.581596
  79.116684  74.49585 ]
reward:  -1.3747107936184844
timestep:  1
new state [198.49472  199.11829  208.9

timestep:  1
new state [208.52097  208.41356  206.74391  208.3781   206.45645   13.55508
  20.255032  35.263794]
reward:  -1.3734185744302652
timestep:  2
new state [263.3      263.3      263.3      263.3      263.3       41.887405
  62.40885  118.15421 ]
reward:  -0.24092141695166666

env reset!
starting_state [263.3       263.3       263.3       263.3       263.3        11.28861
  15.4703665  26.756807 ]
timestep:  0
new state [251.81221   251.82793   252.1512    251.83307   249.25293     7.3725243
   8.209448    6.464558 ]
reward:  -1.3784176898663283
timestep:  1
new state [233.01556  233.2344   236.25452  233.30754  245.85454   31.913866
  60.579903  96.16475 ]
reward:  -0.13612553396364313
timestep:  2
new state [366.2      366.2      366.2      366.2      366.2       37.437077
  72.17125   80.789856]
reward:  -1.3719348870457204

env reset!
starting_state [366.2      366.2      366.2      366.2      366.2       33.118614
  43.576427  21.261703]
timestep:  0
new state [341.356   

reward:  -0.10091582437720789
timestep:  2
new state [343.3      343.3      343.3      343.3      343.3       18.386501
  39.33978   50.860443]
reward:  -0.16355426826473504

env reset!
starting_state [343.3      343.3      343.3      343.3      343.3       10.510422
   8.959895  20.583656]
timestep:  0
new state [314.0747   313.8053   310.24802  313.71548  332.49432    8.499482
   5.828298  14.759619]
reward:  -0.2591731100004413
timestep:  1
new state [293.12592  292.58466  285.24426  292.40472  304.91605   38.588608
  36.916855  28.731676]
reward:  -0.13082161833628142
timestep:  2
new state [373.1      373.1      373.1      373.1      373.1       47.148235
  38.63328   85.47568 ]
reward:  -0.06221312006046286

env reset!
starting_state [373.1     373.1     373.1     373.1     373.1     101.98864  95.16695
  64.32684]
timestep:  0
new state [310.07532 310.571   317.62976 310.73816 339.26263  75.12408 107.28938
 100.2036 ]
reward:  -1.3260265589315958
timestep:  1
new state [243.3562

new state [607.45044  608.0711   616.25696  608.27875  628.7539     8.194466
  13.246318  16.880304]
reward:  0.004603328380927538
timestep:  1
new state [576.61224  577.4883   589.7241   577.77954  597.21063    8.282765
   9.343883  10.975043]
reward:  -0.08645997461943172
timestep:  2
new state [88.4      88.4      88.4      88.4      88.4       7.745594  7.176407
  6.834895]
reward:  -0.08343951042920396

env reset!
starting_state [88.4       88.4       88.4       88.4       88.4        9.646631
  4.5748014 13.86835  ]
timestep:  0
new state [82.82854  82.72847  81.170944 82.69532  81.11908   7.150502 10.046663
 16.76197 ]
reward:  -1.4082917995634325
timestep:  1
new state [75.49433  75.41065  74.15643  75.3829   72.31899  10.249759 13.550874
 43.51639 ]
reward:  -1.375911872994358
timestep:  2
new state [220.       220.       220.       220.       220.        30.658995
  24.893032  54.17347 ]
reward:  -1.419485826559771

env reset!
starting_state [220.       220.       220.       

timestep:  2
new state [343.4       343.4       343.4       343.4       343.4         6.9706616
   5.670271    1.       ]
reward:  -0.1803118272607722

env reset!
starting_state [343.4      343.4      343.4      343.4      343.4       29.085903
  47.19737   89.94591 ]
timestep:  0
new state [308.27304  308.314    309.31522  308.32703  296.18613   44.505474
  41.59888   91.696526]
reward:  -1.3852030912646054
timestep:  1
new state [271.58572  271.38824  268.88724  271.322    248.04916   35.116917
  17.87208   52.06892 ]
reward:  -1.393949973070977
timestep:  2
new state [469.5      469.5      469.5      469.5      469.5       33.133133
  48.93146   74.913124]
reward:  -0.26501700361218283

env reset!
starting_state [469.5      469.5      469.5      469.5      469.5       25.460798
  44.391293  15.7513  ]
timestep:  0
new state [388.37582  390.53314  420.60217  391.25018  440.0415    33.008034
  49.12839   50.76525 ]
reward:  -0.00987635711695082
timestep:  1
new state [278.05975  281.4

Lower and Upper Solutions:
[[0.18104 0.19064 0.32842 0.19385 0.     ]
 [0.29582 0.27779 0.      0.27181 0.     ]
 [0.12554 0.13123 0.22564 0.1331  0.44553]]
[[0.75602 0.7944  1.36398 0.80696 0.     ]
 [1.23528 1.15498 0.      1.12855 0.     ]
 [0.51889 0.54715 0.94188 0.5565  1.8551 ]]
timestep:  0
new state [415.70886 415.12506 406.35205 414.93436 433.91086  38.91278  71.64389
   5.44837]
reward:  -0.08398238316131931
timestep:  1
new state [294.96265  298.48444  348.10828  299.64758  423.74832   18.102997
  21.45836   31.063812]
reward:  0.045668966475337594
timestep:  2
new state [344.7      344.7      344.7      344.7      344.7       24.134176
  29.325552  54.766094]
reward:  -0.08353946611052968

env reset!
starting_state [344.7       344.7       344.7       344.7       344.7         8.275917
   6.1462727  23.933739 ]
timestep:  0
new state [318.43192  317.93146  310.86603  317.76617  334.0296     6.277636
   7.163428  17.836748]
reward:  -0.3115442023198107
timestep:  1
new stat

timestep:  2
new state [415.       415.       415.       415.       415.        20.721453
  57.89818   73.81426 ]
reward:  -0.0737677669355793

env reset!
starting_state [415.       415.       415.       415.       415.        68.53038
  69.694214 136.3754  ]
timestep:  0
new state [364.85574  364.67847  361.68665  364.62024  354.17157   38.588337
  45.5311    41.92854 ]
reward:  -1.529781335187501
timestep:  1
new state [257.68222  258.49518  269.5385   258.7636   276.34787   16.848251
  18.003662  54.69818 ]
reward:  -0.05515232852009794
timestep:  2
new state [488.7     488.7     488.7     488.7     488.7      52.49309  80.51353
  86.61034]
reward:  -0.2962872455401173

env reset!
starting_state [488.7      488.7      488.7      488.7      488.7       24.715092
  26.12814   66.91871 ]
timestep:  0
new state [403.01587  402.2743   391.94666  402.02875  458.8603    28.025057
  29.4309    61.69561 ]
reward:  -0.27401629319336845
timestep:  1
new state [313.45972 312.26233 295.59647 311

timestep:  2
new state [204.5       204.5       204.5       204.5       204.5         6.953627
   4.6202626  11.862257 ]
reward:  -0.07671870164782679

env reset!
starting_state [204.5      204.5      204.5      204.5      204.5       30.500729
  49.106117  36.047768]
timestep:  0
new state [179.92615  180.3136   186.32458  180.44194  188.39984   44.25565
  55.641575  82.57135 ]
reward:  -1.4633330868336623
timestep:  1
new state [145.08821  145.5842   153.13092  145.7488   151.56187   39.81807
  55.147755  89.94631 ]
reward:  -1.5114020958384644
timestep:  2
new state [594.3      594.3      594.3      594.3      594.3       39.69959
  48.491627  27.505339]
reward:  -1.5175941056828461

env reset!
starting_state [594.3      594.3      594.3      594.3      594.3        9.560337
  18.160345  16.280468]
timestep:  0
new state [556.1913   556.8226   565.91656  557.0302   564.0842    12.147562
  12.095174  11.091602]
reward:  -0.04764519256184625
timestep:  1
new state [526.3112    527.134

timestep:  1
new state [220.68982  220.28252  213.8879   220.14766  210.75671    7.120119
  11.146607  21.087925]
reward:  -1.55916788873085
timestep:  2
new state [204.        204.        204.        204.        204.          5.396743
   6.1037455   5.957112 ]
reward:  -0.24598229676085778

env reset!
starting_state [204.       204.       204.       204.       204.         9.975209
  13.237509  11.401019]
timestep:  0
new state [196.84688   196.92493   198.14479   196.95074   198.9089      7.8532887
   8.1733885  12.701206 ]
reward:  -1.4759733419726582
timestep:  1
new state [191.41275  191.49051  192.69563  191.51625  193.24213    8.361709
   9.827693  14.049187]
reward:  -1.5149738260725085
timestep:  2
new state [237.8      237.8      237.8      237.8      237.8       32.564133
  44.00888   19.04667 ]
reward:  -0.2002787459994398

env reset!
starting_state [237.8       237.8       237.8       237.8       237.8         7.13509
   7.0053797   8.972207 ]
timestep:  0
new state [233.3

timestep:  0
new state [379.89883  378.45667  359.0279   377.97644  499.09198   70.727875
  63.94251   93.313   ]
reward:  -0.2984992178355986
timestep:  1
new state [336.46426  334.96506  314.71234  334.46567  325.91968   35.703125
  48.16002   61.922764]
reward:  -1.2310680388622672
timestep:  2
new state [568.2      568.2      568.2      568.2      568.2       53.196083
   9.91678   89.91135 ]
reward:  -0.07505746879200134

env reset!
starting_state [568.2     568.2     568.2     568.2     568.2      36.24253  64.35823
  59.30623]
timestep:  0
new state [430.5261   432.62708  462.8744   433.31833  458.13074   43.5855
  49.650078  51.289314]
reward:  -0.050258087204407505
timestep:  1
new state [309.62933  312.59497  355.09143  313.57147  362.93732   34.74719
  43.892017  81.34888 ]
reward:  -0.06244727613626208
timestep:  2
new state [563.4     563.4     563.4     563.4     563.4      88.398    97.89396
 199.51474]
reward:  -0.09994841829703129

env reset!
starting_state [563.4     

timestep:  2
new state [308.7      308.7      308.7      308.7      308.7       49.596004
  27.82501   10.060733]
reward:  -0.061612233726948164

env reset!
starting_state [308.7      308.7      308.7      308.7      308.7       41.31072
  52.407177  95.68648 ]
timestep:  0
new state [273.70554  273.7094   273.51584  273.71127  266.02197   31.152412
  69.70468  113.64171 ]
reward:  -1.525362289868019
timestep:  1
new state [233.17908  233.49405  237.60779  233.60023  215.34076   11.565746
  19.625528  35.917618]
reward:  -1.5169250468548707
timestep:  2
new state [288.7      288.7      288.7      288.7      288.7       20.998123
  27.793528  59.579525]
reward:  -0.24348709265338075

env reset!
starting_state [288.7      288.7      288.7      288.7      288.7       52.72184
  55.522614  90.40519 ]
timestep:  0
new state [251.38107  251.36162  250.95831  251.35535  248.36766   45.581596
  79.116684  74.49585 ]
reward:  -1.5179391806277718
timestep:  1
new state [210.37247  210.91803  219

  12.7448845  20.188892 ]
reward:  -1.5437081923597213
timestep:  1
new state [210.71292  210.61566  209.11418  210.58337  209.27496   13.55508
  20.255032  35.263794]
reward:  -1.5166469917628707
timestep:  2
new state [263.3      263.3      263.3      263.3      263.3       41.887405
  62.40885  118.15421 ]
reward:  -0.23331290611497735

env reset!
starting_state [263.3       263.3       263.3       263.3       263.3        11.28861
  15.4703665  26.756807 ]
timestep:  0
new state [253.3208    253.33911   253.54744   253.34537   251.36565     7.3725243
   8.209448    6.464558 ]
reward:  -1.5216460488166117
timestep:  1
new state [234.25166  234.46355  237.39853  234.53374  248.4777    31.913866
  60.579903  96.16475 ]
reward:  -0.1262173156251641
timestep:  2
new state [366.2      366.2      366.2      366.2      366.2       37.437077
  72.17125   80.789856]
reward:  -1.515163214011529

env reset!
starting_state [366.2      366.2      366.2      366.2      366.2       33.118614
  43.

timestep:  2
new state [343.3      343.3      343.3      343.3      343.3       18.386501
  39.33978   50.860443]
reward:  -0.1505416430704504

env reset!
starting_state [343.3      343.3      343.3      343.3      343.3       10.510422
   8.959895  20.583656]
timestep:  0
new state [313.60526  313.33966  309.57217  313.252    334.11963    8.499482
   5.828298  14.759619]
reward:  -0.2516003119559636
timestep:  1
new state [292.3213   291.7804   284.07434  291.60202  306.7319    38.588608
  36.916855  28.731676]
reward:  -0.11780935018740143
timestep:  2
new state [373.1      373.1      373.1      373.1      373.1       47.148235
  38.63328   85.47568 ]
reward:  -0.04920123339702264

env reset!
starting_state [373.1     373.1     373.1     373.1     373.1     101.98864  95.16695
  64.32684]
timestep:  0
new state [318.4081  318.77887 325.0426  318.90027 344.3419   75.12408 107.28938
 100.2036 ]
reward:  -1.4692549127573242
timestep:  1
new state [260.48975  261.50357  277.70676  261.83

timestep:  2
new state [632.1      632.1      632.1      632.1      632.1       85.00664
  49.54382   42.874012]
reward:  -1.519074505481724

env reset!
starting_state [632.1       632.1       632.1       632.1       632.1        10.489874
  13.050293    1.7849479]
timestep:  0
new state [607.12244  607.71735  616.10425  607.9138   628.777      8.194466
  13.246318  16.880304]
reward:  0.017614873586854027
timestep:  1
new state [575.8053   576.67236  589.0213   576.9582   597.4516     8.282765
   9.343883  10.975043]
reward:  -0.07344783109683554
timestep:  2
new state [88.4      88.4      88.4      88.4      88.4       7.745594  7.176407
  6.834895]
reward:  -0.07042745236267046

env reset!
starting_state [88.4       88.4       88.4       88.4       88.4        9.646631
  4.5748014 13.86835  ]
timestep:  0
new state [83.55923  83.47019  82.10031  83.44065  82.21413   7.150502 10.046663
 16.76197 ]
reward:  -1.5515203157316444
timestep:  1
new state [77.1884   77.116486 75.96475  77.0

  87.80043]
reward:  -1.5150209066404712
timestep:  1
new state [214.13147  214.08456  213.07684  214.06952  205.5704    44.73864
  42.23047   63.062866]
reward:  -1.5244633875849405
timestep:  2
new state [421.9      421.9      421.9      421.9      421.9       20.367168
  14.929051  28.97692 ]
reward:  -1.5127802083918778

env reset!
starting_state [421.9      421.9      421.9      421.9      421.9       42.704178
  53.84729   65.30015 ]
timestep:  0
new state [289.21472  290.05426  302.1205   290.33054  392.75854   48.31242
  51.394344  89.22318 ]
reward:  -0.18043155665007307
timestep:  1
new state [254.06369  254.85838  266.09573  255.12007  352.9571    30.889675
  28.536442  95.86964 ]
reward:  -1.522065837822788
timestep:  2
new state [532.7      532.7      532.7      532.7      532.7       29.215311
  29.297398  90.52053 ]
reward:  -0.3031324041755763

env reset!
starting_state [532.7      532.7      532.7      532.7      532.7       15.893612
  22.797962  31.012772]
timestep: 


iter 0
episode 0
step 0
oldState [346.37134  346.59894  350.8377   346.67175  357.542     10.932322
   7.617694  10.668706]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.9003692079730182
newState [382.08      381.97922   381.04694   381.94394   383.84515     7.2902865
  13.422441    1.0207489]
info {'type': array([ 7.29028633, 13.42244012,  1.02074885])}

iter 0
episode 0
step 1
oldState [382.08      381.97922   381.04694   381.94394   383.84515     7.2902865
  13.422441    1.0207489]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.7707156249571203
newState [372.10687  372.30737  376.20197  372.3709   383.02676   18.746086
  22.220646  32.167324]
info {'type': array([18.74608702, 22.22064651, 32.1673

episode 0
step 1
oldState [525.3296   526.94055  551.3126   527.4739   564.24945   45.255993
  48.724453  50.271637]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.8793782206761427
newState [472.20963  473.97693  502.05112  474.5578   524.40643   35.702198
  55.011387  43.152588]
info {'type': array([35.70219698, 55.0113853 , 43.15258931])}

iter 30
episode 0
step 2
oldState [472.20963  473.97693  502.05112  474.5578   524.40643   35.702198
  55.011387  43.152588]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.8580546083904961
newState [403.        403.        403.        403.        403.          5.858639
   4.8854113  20.20858  ]
info {'type': array([ 5.85863872,  4.88541134, 20.20857924])}

iter 31

iter 60
episode 0
step 2
oldState [518.358     519.35956   532.59515   519.69763   529.20447     7.408211
   5.7085104   5.003377 ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.8751740167183147
newState [442.8      442.8      442.8      442.8      442.8       40.825855
  86.69175  119.6945  ]
info {'type': array([ 40.82585612,  86.69175306, 119.69450615])}

iter 61
episode 0
step 0
oldState [197.99567  198.48665  199.29411  198.67172  249.08615   43.107773
  72.889244  96.54523 ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.892346687451813
newState [366.4901  367.13193 374.59583 367.35193 366.31485  59.03257  76.00735
 186.46773]
info {'type': array([ 59.03256864,  76.00734602, 186.46773425])}

it


iter 91
episode 0
step 0
oldState [421.00302  421.47806  426.879    421.64157  420.18964   42.640495
  66.17611   12.99464 ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.7968364281292858
newState [501.39282  502.63     523.1981   503.03342  544.2587    42.420963
  42.42093  149.11977 ]
info {'type': array([ 42.42096342,  42.42093047, 149.11976329])}

iter 91
episode 0
step 1
oldState [501.39282  502.63     523.1981   503.03342  544.2587    42.420963
  42.42093  149.11977 ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.9563161477492784
newState [429.84625   430.09515   432.4442    430.18262   426.1701      3.754453
  10.728432    7.5455666]
info {'type': array([ 3.75445302, 10.72843143,  7.54556638

info {'type': array([11.3449239 , 14.51943186, 32.22775095])}

iter 121
episode 0
step 1
oldState [472.20398  471.76077  465.91     471.61142  467.60263   11.344924
  14.519432  32.227753]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.9285040263812026
newState [453.09872  452.5845   444.9821   452.41394  442.07758   58.592712
  95.66263  123.06715 ]
info {'type': array([ 58.59271254,  95.66262732, 123.06714403])}

iter 121
episode 0
step 2
oldState [453.09872  452.5845   444.9821   452.41394  442.07758   58.592712
  95.66263  123.06715 ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.8904486076543321
newState [443.5      443.5      443.5      443.5      443.5       29.078756
  30.836649 125.66759 ]
i

newState [390.68286 390.66434 389.55457 390.6611  384.5261   44.67073  99.16755
  58.75831]
info {'type': array([44.67073204, 99.16754622, 58.75830875])}

iter 151
episode 0
step 2
oldState [390.68286 390.66434 389.55457 390.6611  384.5261   44.67073  99.16755
  58.75831]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.834591982534829
newState [364.3      364.3      364.3      364.3      364.3       11.45339
  22.747179  41.27174 ]
info {'type': array([11.45339017, 22.74717898, 41.27174019])}

iter 152
episode 0
step 0
oldState [138.36005  139.2631   144.91911  139.58904  170.7311    27.08266
  51.075848  47.723213]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.8669552895099502
newState [316.4516   31

iter 180
episode 0
step 2
oldState [228.74069  228.35397  219.62927  228.23589  240.6359    35.116917
  17.87208   52.06892 ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.9399715713804938
newState [469.5      469.5      469.5      469.5      469.5       33.133133
  48.93146   74.913124]
info {'type': array([33.13313439, 48.93146102, 74.91312507])}

iter 181
episode 0
step 0
oldState [310.72058  311.82     329.34534  312.1814   343.79437   25.460798
  44.391293  15.7513  ]
action [[3.2850e-01 3.5372e-01 6.0246e-01 3.6250e-01 5.0000e-04]
 [5.4705e-01 5.1015e-01 5.0000e-04 4.9778e-01 5.0000e-04]
 [2.3072e-01 2.4067e-01 4.3707e-01 2.4381e-01 7.9162e-01]]
reward -0.8132498648373071
newState [433.21774  434.05692  447.25427  434.33304  456.99603   33.008034
  49.12839   50.76525 ]
info {'type': array([33.00803419, 49.12839024, 50.76525088])}

iter

ValueError: Parameter value must be nonnegative.

In [None]:
from IPython.display import IFrame
IFrame("../figures/resource_line_plot.pdf", width=600, height=280)

In [None]:
IFrame("../figures/resource_radar_plot.pdf", width=600, height=500)