# Resource Allocation Code Demo

The Food Bank of the Southern Tier (FBST) is a member of Feeding America, focused on providing food security for people with limited financial resources, and serves six counties and nearly 4,000 square miles in the New York.  Under normal operations (non COVID times), the Mobile Food Pantry program is among the main activities of the FBST.  The goal of the service is to make nutritious and healthy food more accessible to people in underserved communities.  Even in areas where other agencies provide assistance, clients may not always have access to food due to limited public transportation options, or because those agencies are only open hours or days per work.

Here we do a sample experiment testing out some of the existing and developed algorithms against a randomized heuristic.

In [1]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

In [2]:
# Getting out configuration parameter for the environment
CONFIG = or_suite.envs.env_configs.resource_allocation_foodbank_config(10)
# CONFIG = or_suite.envs.env_configs.resource_allocation_default_config


# Specifying training iteration, epLen, number of episodes, and number of iterations
epLen = CONFIG['num_rounds']
nEps = 1
numIters = 5

# Configuration parameters for running the experiment
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/resource/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, # save trajectory for calculating additional metrics
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False # indicator for pickling final information
                    }

resource_env = gym.make('Resource-v0', config=CONFIG)
mon_env = Monitor(resource_env)


index [16 48 24 38 14 34 66 42 64  3]
mean[[ 71.325  85.59  128.385]
 [  6.2     7.44   11.16 ]
 [ 30.6    36.72   55.08 ]
 [  6.475   7.77   11.655]
 [ 40.05   48.06   72.09 ]
 [ 34.725  41.67   62.505]
 [ 45.1    54.12   81.18 ]
 [ 19.3    23.16   34.74 ]
 [ 53.425  64.11   96.165]
 [ 42.675  51.21   76.815]]
std_dev[[17.83125 25.677   57.77325]
 [ 1.55     2.232    5.022  ]
 [ 7.65    11.016   24.786  ]
 [ 1.61875  2.331    5.24475]
 [10.0125  14.418   32.4405 ]
 [ 8.68125 12.501   28.12725]
 [11.275   16.236   36.531  ]
 [ 4.825    6.948   15.633  ]
 [13.35625 19.233   43.27425]
 [10.66875 15.363   34.56675]]


In [3]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
#  'Random': or_suite.agents.rl.random.randomAgent(),
#  'Equal': or_suite.agents.resource_allocation.equal_allocation.equalAllocationAgent(epLen, CONFIG),
  'FixedThreshold': or_suite.agents.resource_allocation.fixed_threshold.fixedThresholdAgent(epLen, CONFIG),
 'Guardrail-0.5': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.5),
  'Guardrail-0.3': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.3),
#  'Guardrail-0.25': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.25)
}

Mean and variance endomwnets:
[[ 70.52447487   6.14774399  30.83056399   6.41716215  40.04699456
   34.46719369  45.2261563   19.28374002  53.62048425  42.78775517]
 [ 88.16887231   7.43441336  36.43635158   7.74197741  48.04468423
   42.01459717  53.75854587  23.47482705  64.54384759  50.99761653]
 [127.89917729  11.34623172  55.7116382   11.51124433  71.75330113
   62.88872063  80.29007251  35.04408972  96.74579243  78.14582232]] [[2.97131268e+02 2.25707192e+00 5.86075944e+01 2.79873988e+00
  9.29322543e+01 7.53241726e+01 1.25906576e+02 2.38287214e+01
  1.69919255e+02 1.15873497e+02]
 [6.41851866e+02 4.83541940e+00 1.17258915e+02 5.30067127e+00
  2.04274751e+02 1.68105950e+02 2.57679860e+02 5.01423843e+01
  3.61413896e+02 2.52038503e+02]
 [3.15295309e+03 2.38902909e+01 5.78548071e+02 2.51725783e+01
  9.74731458e+02 7.06762672e+02 1.26593197e+03 2.30207230e+02
  1.73676533e+03 1.06478935e+03]]
Mean and variance endomwnets:
[[ 70.99311643   6.21673805  30.49619821   6.48429692  39.7062

# Step 5: Run Simulations

Run the different heuristics in the environment

In [4]:
import warnings
warnings.simplefilter('ignore')

In [5]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/resource_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'AdaQL' or agent == 'Unif QL' or agent == 'AdaMB' or agent == 'Unif MB':
        or_suite.utils.run_single_algo_tune(resource_env, agents[agent], scaling_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(resource_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/resource_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/resource_'+str(agent)+'/')
        algo_list_radar.append(str(agent))     
        
fig_path = '../figures/'
fig_name = 'resource'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)        
        
additional_metric = { 'Efficiency': lambda traj : or_suite.utils.delta_EFFICIENCY(traj, CONFIG),
                    'Hindsight Envy': lambda traj : or_suite.utils.delta_HINDSIGHT_ENVY(traj, CONFIG),
                      'Counterfactual Envy': lambda traj : or_suite.utils.delta_COUNTERFACTUAL_ENVY(traj, CONFIG),
#                     'Budget': lambda traj : or_suite.utils.times_out_of_budget(traj, CONFIG)
#                       'Prop': lambda traj : or_suite.utils.delta_PROP(traj, CONFIG), \
#                       'Exante Envy': lambda traj : or_suite.utils.delta_EXANTE_ENVY(traj, CONFIG)
                    }
fig_name = 'resource'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)

FixedThreshold
index [29  5 40 19 22 59 45 39 14 58]
mean[[ 57.65   69.18  103.77 ]
 [ 52.7    63.24   94.86 ]
 [  8.45   10.14   15.21 ]
 [ 31.5    37.8    56.7  ]
 [ 41.5    49.8    74.7  ]
 [  5.875   7.05   10.575]
 [  6.175   7.41   11.115]
 [ 28.     33.6    50.4  ]
 [ 40.05   48.06   72.09 ]
 [  8.575  10.29   15.435]]
std_dev[[14.4125  20.754   46.6965 ]
 [13.175   18.972   42.687  ]
 [ 2.1125   3.042    6.8445 ]
 [ 7.875   11.34    25.515  ]
 [10.375   14.94    33.615  ]
 [ 1.46875  2.115    4.75875]
 [ 1.54375  2.223    5.00175]
 [ 7.      10.08    22.68   ]
 [10.0125  14.418   32.4405 ]
 [ 2.14375  3.087    6.94575]]
Lower and Upper Solutions:
[[0.31983 0.346   0.57647 0.35525 0.     ]
 [0.53    0.49527 0.      0.48363 0.     ]
 [0.22661 0.23468 0.42958 0.23713 0.75146]]
index [ 7 35 40 47 36 32 54  3 22 26]
mean[[42.125 50.55  75.825]
 [ 7.525  9.03  13.545]
 [ 8.45  10.14  15.21 ]
 [ 8.2    9.84  14.76 ]
 [16.725 20.07  30.105]
 [37.075 44.49  66.735]
 [ 6.1    7.32  10.98

In [6]:
from IPython.display import IFrame
IFrame("../figures/resource_line_plot.pdf", width=600, height=280)

In [7]:
IFrame("../figures/resource_radar_plot.pdf", width=600, height=500)