In [1]:
#!pip install -e git+https://github.com/casperbroch/ai-economist@stockmarket#egg=ai-economist

In [2]:
# Define the configuration of the environment that will be built

env_config_dict = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'stock_market_simulation',
    
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('BuyOrSellStocks', {}),
        # (2) Trading collectible resources
        ('ExecCircuitBreaker', {}),
    ],
    

    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 4,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 100, # Number of timesteps per episode
    
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': True,
    'dense_log_frequency': 1
}

In [3]:
from rllib.env_wrapper import RLlibEnvWrapper
env_obj = RLlibEnvWrapper({"env_config_dict": env_config_dict}, verbose=True)

Inside covid19_components.py: 0 GPUs are available.
No GPUs found! Running the simulation on a CPU.
Inside covid19_env.py: 0 GPUs are available.
No GPUs found! Running the simulation on a CPU.
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


[*********************100%***********************]  1 of 1 completed
[EnvWrapper] Spaces
[EnvWrapper] Obs (a)   
BuyOrSellStocks-stock_price: (1,)
action_mask    : (21,)
time           : (1,)
world-Endogenous-AbleToBuy: (1,)
world-Endogenous-AbleToSell: (1,)
world-Endogenous-AvailableFunds: (1,)
world-Endogenous-Demand: (1,)
world-Endogenous-Labor: (1,)
world-Endogenous-NumberOfStocks: (1,)
world-Endogenous-StockPrice: (1,)
world-Endogenous-StockPriceHistory: (1,)
world-Endogenous-Supply: (1,)
world-Endogenous-TotalBalance: (1,)


[EnvWrapper] Obs (p)   
action_mask    : (3,)
time           : (1,)
world-Prices_History: (1,)
world-Total_Demand: (1,)
world-Total_Supply: (1,)
world-Volumes  : (1,)


[EnvWrapper] Action (a) Discrete(21)
[EnvWrapper] Action (p) Discrete(3)


In [4]:
import ray
from ray.rllib.agents.ppo import PPOTrainer

In [5]:
policies = {
    "a": (
        None,  # uses default policy
        env_obj.observation_space,
        env_obj.action_space,
        {}  # define a custom agent policy configuration.
    ),
    "p": (
        None,  # uses default policy
        env_obj.observation_space_pl,
        env_obj.action_space_pl,
        {}  # define a custom planner policy configuration.
    )
}

# In foundation, all the agents have integer ids and the social planner has an id of "p"
policy_mapping_fun = lambda i: "a" if str(i).isdigit() else "p"

policies_to_train = ["a", "p"]

In [6]:
trainer_config = {
    "multiagent": {
        "policies": policies,
        "policies_to_train": policies_to_train,
        "policy_mapping_fn": policy_mapping_fun,
    }
}

In [7]:
trainer_config.update(
    {
        "num_workers": 2,
        "num_envs_per_worker": 2,
        # Other training parameters
        "train_batch_size":  4000,
        "sgd_minibatch_size": 4000,
        "num_sgd_iter": 1
    }
)

In [8]:
# We also add the "num_envs_per_worker" parameter for the env. wrapper to index the environments.
env_config = {
    "env_config_dict": env_config_dict,
    "num_envs_per_worker": trainer_config.get('num_envs_per_worker'),   
}

trainer_config.update(
    {
        "env_config": env_config        
    }
)

In [9]:
# Initialize Ray
ray.init(webui_host="127.0.0.1")

2024-05-02 15:51:12,733	INFO resource_spec.py:212 -- Starting Ray with 4.49 GiB memory available for workers and up to 2.25 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2024-05-02 15:51:13,648	INFO services.py:1165 -- View the Ray dashboard at [1m[32m127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.1.41',
 'raylet_ip_address': '192.168.1.41',
 'redis_address': '192.168.1.41:6379',
 'object_store_address': 'tcp://127.0.0.1:55667',
 'raylet_socket_name': 'tcp://127.0.0.1:61497',
 'webui_url': '127.0.0.1:8265',
 'session_dir': 'C:\\Users\\caspe\\AppData\\Local\\Temp\\ray\\session_2024-05-02_15-51-12_733374_12360'}

In [10]:
# Create the PPO trainer.
trainer = PPOTrainer(
    env=RLlibEnvWrapper,
    config=trainer_config,
    )

2024-05-02 15:51:18,904	ERROR syncer.py:46 -- Log sync requires rsync to be installed.
2024-05-02 15:51:18,907	INFO trainer.py:585 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2024-05-02 15:51:18,908	INFO trainer.py:612 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[2m[36m(pid=16364)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=16364)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=16364)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=16364)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=16364)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=16364)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=3712)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=3712)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=3712)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=3712)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=3712)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=3712)[0m   np

[2m[36m(pid=3712)[0m Inside covid19_components.py: 0 GPUs are available.
[2m[36m(pid=3712)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=3712)[0m Inside covid19_env.py: 0 GPUs are available.
[2m[36m(pid=3712)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=16364)[0m Inside covid19_components.py: 0 GPUs are available.
[2m[36m(pid=16364)[0m No GPUs found! Running the simulation on a CPU.
[2m[36m(pid=16364)[0m Inside covid19_env.py: 0 GPUs are available.
[2m[36m(pid=16364)[0m No GPUs found! Running the simulation on a CPU.
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [11]:
NUM_ITERS = 10
for iteration in range(NUM_ITERS):
    print(f'********** Iter : {iteration} **********')
    result = trainer.train()
    print(f'''episode_reward_mean: {result.get('episode_reward_mean')}''')

********** Iter : 0 **********
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


RayTaskError(ValueError): [36mray::RolloutWorker.par_iter_next()[39m (pid=16364, ip=192.168.1.41)
  File "python\ray\_raylet.pyx", line 446, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 400, in ray._raylet.execute_task.function_executor
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\function_manager.py", line 567, in actor_method_executor
    raise e
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\function_manager.py", line 559, in actor_method_executor
    method_returns = method(actor, *args, **kwargs)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\util\iter.py", line 1125, in par_iter_next
    return next(self.local_it)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 263, in gen_rollouts
    yield self.sample()
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 528, in sample
    batches = [self.input_reader.next()]
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\evaluation\sampler.py", line 59, in next
    batches = [self.get_data()]
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\evaluation\sampler.py", line 164, in get_data
    item = next(self.rollout_provider)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\evaluation\sampler.py", line 489, in _env_runner
    observation_fn=observation_fn)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\evaluation\sampler.py", line 641, in _process_observations
    policy_id).transform(raw_obs)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\models\preprocessors.py", line 234, in transform
    self.write(observation, array, 0)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\models\preprocessors.py", line 244, in write
    p.write(o, array, offset)
  File "C:\Users\caspe\anaconda3\envs\ai-economist\lib\site-packages\ray\rllib\models\preprocessors.py", line 170, in write
    observation, copy=False).ravel()
ValueError: could not broadcast input array from shape (2,) into shape (1,)

In [None]:
env_obj.env.get_agent(3).state

In [None]:
def generate_rollout_from_current_trainer_policy(
    trainer, 
    env_obj,
    num_dense_logs=1
):
    dense_logs = {}
    for idx in range(num_dense_logs):
        # Set initial states
        agent_states = {}
        for agent_idx in range(env_obj.env.n_agents):
            agent_states[str(agent_idx)] = trainer.get_policy("a").get_initial_state()
        planner_states = trainer.get_policy("p").get_initial_state()   

        # Play out the episode
        obs = env_obj.reset(force_dense_logging=True)
        for t in range(env_obj.env.episode_length):
            actions = {}
            for agent_idx in range(env_obj.env.n_agents):
                # Use the trainer object directly to sample actions for each agent
                actions[str(agent_idx)] = trainer.compute_action(
                    obs[str(agent_idx)], 
                    agent_states[str(agent_idx)], 
                    policy_id="a",
                    full_fetch=False
                )

            # Action sampling for the planner
            actions["p"] = trainer.compute_action(
                obs['p'], 
                planner_states, 
                policy_id='p',
                full_fetch=False
            )

            obs, rew, done, info = env_obj.step(actions)        
            if done['__all__']:
                break
        dense_logs[idx] = env_obj.env.dense_log
    return dense_logs

In [None]:
dense_logs = generate_rollout_from_current_trainer_policy(
    trainer, 
    env_obj,
    num_dense_logs=1
)

In [None]:
dense_logs[0]['states'][0]

In [None]:
planner_gr_score_importances = [log["p"]["GreenScoreImportance"] for log in dense_logs[0]['states']]

In [None]:
agent_0_green_scores = [log["0"]["endogenous"]["GreenScore"] for log in dense_logs[0]['states']]
agent_1_green_scores = [log["1"]["endogenous"]["GreenScore"] for log in dense_logs[0]['states']]
agent_2_green_scores = [log["2"]["endogenous"]["GreenScore"] for log in dense_logs[0]['states']]
agent_3_green_scores = [log["3"]["endogenous"]["GreenScore"] for log in dense_logs[0]['states']]

In [None]:
agent_0_rel_scores = [log["0"]["endogenous"]["ReliabilityScore"] for log in dense_logs[0]['states']]
agent_1_rel_scores = [log["1"]["endogenous"]["ReliabilityScore"] for log in dense_logs[0]['states']]
agent_2_rel_scores = [log["2"]["endogenous"]["ReliabilityScore"] for log in dense_logs[0]['states']]
agent_3_rel_scores = [log["3"]["endogenous"]["ReliabilityScore"] for log in dense_logs[0]['states']]

In [None]:
agent_0_data = [log["0"]["endogenous"]["TotalData"] for log in dense_logs[0]['states']]
agent_1_data = [log["1"]["endogenous"]["TotalData"] for log in dense_logs[0]['states']]
agent_2_data = [log["2"]["endogenous"]["TotalData"] for log in dense_logs[0]['states']]
agent_3_data = [log["3"]["endogenous"]["TotalData"] for log in dense_logs[0]['states']]

In [None]:
len(agent_0_green_scores)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.plot(np.arange(0,1001),agent_0_green_scores, label="Agent 0")
plt.plot(np.arange(0,1001),agent_1_green_scores, label="Agent 1")
plt.plot(np.arange(0,1001),agent_2_green_scores, label="Agent 2")
plt.plot(np.arange(0,1001),agent_3_green_scores, label="Agent 3")
plt.title('Miner Green Scores')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Green Scores')
plt.savefig("miners_green_scores.png")
plt.show()

In [None]:
plt.plot(np.arange(0,1001),agent_0_rel_scores, label="Agent 0")
plt.plot(np.arange(0,1001),agent_1_rel_scores, label="Agent 1")
plt.plot(np.arange(0,1001),agent_2_rel_scores, label="Agent 2")
plt.plot(np.arange(0,1001),agent_3_rel_scores, label="Agent 3")
plt.title('Miner Reliability Scores')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Reliability Scores')
plt.savefig("miners_reliability_scores.png")
plt.show()

In [None]:
plt.plot(np.arange(0,1001),agent_0_data, label="Agent 0")
plt.plot(np.arange(0,1001),agent_1_data, label="Agent 1")
plt.plot(np.arange(0,1001),agent_2_data, label="Agent 2")
plt.plot(np.arange(0,1001),agent_3_data, label="Agent 3")
plt.title('Total Data Stored By Each Miner')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Data Storage')
plt.savefig("miners_data_stored.png")
plt.show()

In [None]:
plt.plot(np.arange(0,1001),planner_gr_score_importances)
plt.title('Green Score Importance over Time')
plt.xlabel('Timesteps')
plt.ylabel('Green Score Importance')
plt.savefig("planner_policy.png")
plt.show()

In [None]:
# Shutdown Ray after use
ray.shutdown()