In [11]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display
from utils import plotting  

Inside covid19_components.py: 1 GPUs are available.
Inside covid19_env.py: 1 GPUs are available.




In [5]:
!pip install lz4

Collecting lz4
  Downloading lz4-4.3.3-cp39-cp39-win_amd64.whl (99 kB)
Installing collected packages: lz4
Successfully installed lz4-4.3.3


In [7]:
!pip install pycryptodome

Collecting pycryptodome
  Downloading pycryptodome-3.21.0-cp36-abi3-win_amd64.whl (1.8 MB)
Installing collected packages: pycryptodome
Successfully installed pycryptodome-3.21.0


In [8]:
!pip install Crypto

Collecting Crypto
  Downloading crypto-1.4.1-py2.py3-none-any.whl (18 kB)
Collecting Naked
  Downloading Naked-0.1.32-py2.py3-none-any.whl (587 kB)
Collecting shellescape
  Downloading shellescape-3.8.1-py2.py3-none-any.whl (3.1 kB)
Installing collected packages: shellescape, Naked, Crypto
Successfully installed Crypto-1.4.1 Naked-0.1.32 shellescape-3.8.1


In [9]:
!pip install GPUtil

Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
Building wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py): started
  Building wheel for GPUtil (setup.py): finished with status 'done'
  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7410 sha256=7a177316b5506edd43b34fbda932f8e35fc9f5809100e148b080a28f4c09ac54
  Stored in directory: c:\users\chetna\appdata\local\pip\cache\wheels\2b\b5\24\fbb56595c286984f7315ee31821d6121e1b9828436021a88b3
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0


In [12]:
# Define the configuration of the environment that will be built

env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'layout_from_file/simple_wood_and_stone',

    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('Build', {'skill_dist': "pareto", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        ('ContinuousDoubleAuction', {'max_num_orders': 5}),
        # (3) Movement and resource collection
        ('Gather', {}),
    ],

    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    'env_layout_file': 'quadrant_25x25_20each_30clump.txt',
    'starting_agent_coin': 10,
    'fixed_four_skill_and_loc': True,

    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 4,          # Number of non-planner agents (must be > 1)
    'world_size': [25, 25], # [Height, Width] of the env world
    'episode_length': 1000, # Number of timesteps per episode

    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': True,

    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': True,
}

In [14]:
# Note: The code for sampling actions (this cell), and playing an episode (below) are general.
# That is, it doesn't depend on the Scenario and Component classes used in the environment!

def sample_random_action(agent, mask):
    """Sample random UNMASKED action(s) for agent."""
    # Return a list of actions: 1 for each action subspace
    if agent.multi_action_mode:
        split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
        return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]

    # Return a single action
    else:
        return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())

def sample_random_actions(env, obs):
    """Samples random UNMASKED actions for each agent in obs."""

    actions = {
        a_idx: sample_random_action(env.get_agent(a_idx), a_obs['action_mask'])
        for a_idx, a_obs in obs.items()
    }

    return actions