# Resource Allocation Code Demo

The Food Bank of the Southern Tier (FBST) is a member of Feeding America, focused on providing food security for people with limited financial resources, and serves six counties and nearly 4,000 square miles in the New York.  Under normal operations (non COVID times), the Mobile Food Pantry program is among the main activities of the FBST.  The goal of the service is to make nutritious and healthy food more accessible to people in underserved communities.  Even in areas where other agencies provide assistance, clients may not always have access to food due to limited public transportation options, or because those agencies are only open hours or days per work.

Here we do a sample experiment testing out some of the existing and developed algorithms against a randomized heuristic.

In [1]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

In [2]:
# Getting out configuration parameter for the environment
CONFIG = or_suite.envs.env_configs.resource_allocation_foodbank_config(6)
# CONFIG = or_suite.envs.env_configs.resource_allocation_default_config


# Specifying training iteration, epLen, number of episodes, and number of iterations
epLen = CONFIG['num_rounds']
nEps = 1
numIters = 50

# Configuration parameters for running the experiment
DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/resource/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, # save trajectory for calculating additional metrics
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False # indicator for pickling final information
                    }

resource_env = gym.make('Resource-v0', config=CONFIG)
mon_env = Monitor(resource_env)


In [3]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
#  'Random': or_suite.agents.rl.random.randomAgent(),
#  'Equal': or_suite.agents.resource_allocation.equal_allocation.equalAllocationAgent(epLen, CONFIG),
  'FixedThreshold': or_suite.agents.resource_allocation.fixed_threshold.fixedThresholdAgent(epLen, CONFIG),
 'Guardrail-0.5': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.5),
  'Guardrail-0.3': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.3),
#  'Guardrail-0.25': or_suite.agents.resource_allocation.hope_guardrail.hopeguardrailAgent(epLen, CONFIG, 0.25)
}

Mean and variance endomwnets:
[[ 78.26375893   6.0727015    6.44300652  45.77261221  44.09344226
   65.08365485]
 [ 95.46955096   7.22226892   7.79065549  54.01600952  52.40468424
   78.47564501]
 [143.91159334  10.74500626  11.99429086  82.83258243  78.44525594
  122.24599817]] [[3.64072981e+02 2.33741060e+00 2.57228850e+00 1.15437516e+02
  1.18667423e+02 2.38134618e+02]
 [7.94745501e+02 5.05359566e+00 5.24406535e+00 2.59898915e+02
  2.39934924e+02 5.14491414e+02]
 [4.02164318e+03 2.28563550e+01 2.64385547e+01 1.36285195e+03
  1.23836885e+03 2.72751318e+03]]
Mean and variance endomwnets:
[[ 78.41886444   6.08126054   6.39750545  45.47523737  43.64117171
   65.20269258]
 [ 92.97936165   7.36984864   7.76322388  53.98400493  53.45615034
   78.02872186]
 [144.72413253  11.09143577  11.94231468  80.29985241  80.27982003
  116.85010458]] [[3.75543122e+02 2.24332816e+00 2.63604673e+00 1.25118369e+02
  1.21187605e+02 2.72275097e+02]
 [7.87113421e+02 4.65884021e+00 5.12908280e+00 2.60890199e+

# Step 5: Run Simulations

Run the different heuristics in the environment

In [4]:
import warnings
warnings.simplefilter('ignore')

In [5]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/resource_'+str(agent)+'/'
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    elif agent == 'AdaQL' or agent == 'Unif QL' or agent == 'AdaMB' or agent == 'Unif MB':
        or_suite.utils.run_single_algo_tune(resource_env, agents[agent], scaling_list, DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(resource_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/resource_'+str(agent))
    algo_list_line.append(str(agent))
    if agent != 'SB PPO':
        path_list_radar.append('../data/resource_'+str(agent)+'/')
        algo_list_radar.append(str(agent))     
        
fig_path = '../figures/'
fig_name = 'resource'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)        
        
additional_metric = { 'Efficiency': lambda traj : or_suite.utils.delta_EFFICIENCY(traj, CONFIG),
                    'Hindsight Envy': lambda traj : or_suite.utils.delta_HINDSIGHT_ENVY(traj, CONFIG),
                      'Counterfactual Envy': lambda traj : or_suite.utils.delta_COUNTERFACTUAL_ENVY(traj, CONFIG),
#                     'Budget': lambda traj : or_suite.utils.times_out_of_budget(traj, CONFIG)
#                       'Prop': lambda traj : or_suite.utils.delta_PROP(traj, CONFIG), \
#                       'Exante Envy': lambda traj : or_suite.utils.delta_EXANTE_ENVY(traj, CONFIG)
                    }
fig_name = 'resource'+'_radar_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_radar, algo_list_radar,
fig_path, fig_name,
additional_metric
)

FixedThreshold
Lower Solutions:
[[0.32854 0.34986 0.55642 0.35732 0.     ]
 [0.52429 0.49707 0.      0.48804 0.     ]
 [0.23534 0.23951 0.41909 0.24067 0.73371]]
Writing to file data.csv
Guardrail-0.5
Lower and Upper Solutions:
[[0.32854 0.34986 0.55642 0.35732 0.     ]
 [0.52429 0.49707 0.      0.48804 0.     ]
 [0.23534 0.23951 0.41909 0.24067 0.73371]]
[[0.55367 0.58995 0.94496 0.60261 0.     ]
 [0.88643 0.83982 0.      0.82429 0.     ]
 [0.39829 0.40557 0.70558 0.40766 1.23989]]
Writing to file data.csv
Guardrail-0.3
Lower and Upper Solutions:
[[0.32854 0.34986 0.55642 0.35732 0.     ]
 [0.52429 0.49707 0.      0.48804 0.     ]
 [0.23534 0.23951 0.41909 0.24067 0.73371]]
[[0.78633 0.83699 1.35296 0.85437 0.     ]
 [1.26498 1.19346 0.      1.16997 0.     ]
 [0.56566 0.57966 0.99953 0.58384 1.76452]]
Writing to file data.csv
        Algorithm     Reward      Time     Space     Efficiency  \
0  FixedThreshold -44.802121  1.021070 -35583.46 -211437.667358   
1   Guardrail-0.5 -44.80212

In [6]:
from IPython.display import IFrame
IFrame("../figures/resource_line_plot.pdf", width=600, height=280)

In [7]:
IFrame("../figures/resource_radar_plot.pdf", width=600, height=500)