In [1]:
#!pip install -e git+https://github.com/casperbroch/ai-economist@stockmarket#egg=ai-economist

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import time

In [3]:
# Define the configuration of the environment that will be built

env_config_dict = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'stock_market_simulation',
    
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('BuyOrSellStocks', {}),
        # (2) Trading collectible resources
        #('ExecCircuitBreaker', {}),
    ],
    

    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 4,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 100, # Number of timesteps per episode
    
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': True,
    'dense_log_frequency': 1
}

In [4]:
from rllib.env_wrapper import RLlibEnvWrapper
env_obj = RLlibEnvWrapper({"env_config_dict": env_config_dict}, verbose=True)

Inside covid19_components.py: 0 GPUs are available.
No GPUs found! Running the simulation on a CPU.
Inside covid19_env.py: 0 GPUs are available.
No GPUs found! Running the simulation on a CPU.
393.21212536803733
404.3730059230947
411.6573555546809
414.0867794993427
419.5042225470454
426.6708238923922
430.52610270846355
432.9751468157967
439.0487366723079
449.34949646477884
458.78614158795
465.312412907865
476.36540048222184
482.1867882819715
489.82740714346113
501.04168357287796
504.7767789937467
515.7770252880972
523.5395270054714
527.475451038204
529.0205581123629
541.4680836293566
554.103408778064
554.7187086632741
567.9270865394728
578.169376162053
587.994361436971
610.3531007598178
622.7508908163234
636.529229203614
650.8621877351387
662.0297662465912
675.264144422278
682.5683836456033
685.4080919543869
701.777234987004
710.5455420408365
723.0978338459996
734.7213626069272
764.5457610383585
780.6205133017714
785.7588660266674
802.8133462464006
825.7449156505076
848.3999180436668
8

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


KeyboardInterrupt: 

In [None]:
import ray
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
policies = {
    "a": (
        None,  # uses default policy
        env_obj.observation_space,
        env_obj.action_space,
        {}  # define a custom agent policy configuration.
    ),
    "p": (
        None,  # uses default policy
        env_obj.observation_space_pl,
        env_obj.action_space_pl,
        {}  # define a custom planner policy configuration.
    )
}

# In foundation, all the agents have integer ids and the social planner has an id of "p"
policy_mapping_fun = lambda i: "a" if str(i).isdigit() else "p"

policies_to_train = ["a", "p"]

In [None]:
trainer_config = {
    "multiagent": {
        "policies": policies,
        "policies_to_train": policies_to_train,
        "policy_mapping_fn": policy_mapping_fun,
    }
}

In [None]:
trainer_config.update(
    {
        "num_workers": 2,
        "num_envs_per_worker": 2,
        # Other training parameters
        "train_batch_size":  4000,
        "sgd_minibatch_size": 4000,
        "num_sgd_iter": 1
    }
)

In [None]:
# We also add the "num_envs_per_worker" parameter for the env. wrapper to index the environments.
env_config = {
    "env_config_dict": env_config_dict,
    "num_envs_per_worker": trainer_config.get('num_envs_per_worker'),   
}

trainer_config.update(
    {
        "env_config": env_config        
    }
)

In [None]:
# Initialize Ray
ray.init(webui_host="127.0.0.1")

In [None]:
# Create the PPO trainer.
trainer = PPOTrainer(
    env=RLlibEnvWrapper,
    config=trainer_config,
    )

In [None]:
NUM_ITERS = 10
start_train = time.time()

for iteration in range(NUM_ITERS):
    print(f'********** Iter : {iteration} **********')
    start = time.time()
    result = trainer.train()
    length = time.time() - start
    print(f'''episode_reward_mean: {result.get('episode_reward_mean')}''')
    print(f'''it_time_taken: {length}''')

    
length_train = time.time() - start_train
print("Training took", length_train, " seconds.")

In [None]:
def generate_rollout_from_current_trainer_policy(
    trainer, 
    env_obj,
    num_dense_logs=1
):
    dense_logs = {}
    for idx in range(num_dense_logs):
        # Set initial states
        agent_states = {}
        for agent_idx in range(env_obj.env.n_agents):
            agent_states[str(agent_idx)] = trainer.get_policy("a").get_initial_state()
        planner_states = trainer.get_policy("p").get_initial_state()   

        # Play out the episode
        obs = env_obj.reset(force_dense_logging=True)
        for t in range(env_obj.env.episode_length):
            actions = {}
            for agent_idx in range(env_obj.env.n_agents):
                # Use the trainer object directly to sample actions for each agent
                actions[str(agent_idx)] = trainer.compute_action(
                    obs[str(agent_idx)], 
                    agent_states[str(agent_idx)], 
                    policy_id="a",
                    full_fetch=False
                )

            # Action sampling for the planner
            actions["p"] = trainer.compute_action(
                obs['p'], 
                planner_states, 
                policy_id='p',
                full_fetch=False
            )

            obs, rew, done, info = env_obj.step(actions)        
            if done['__all__']:
                break
        dense_logs[idx] = env_obj.env.dense_log
    return dense_logs

In [None]:
dense_logs = generate_rollout_from_current_trainer_policy(
    trainer, 
    env_obj,
    num_dense_logs=1
)

In [None]:
agent_0_total_balance = [log["0"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]
agent_1_total_balance = [log["1"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]
agent_2_total_balance = [log["2"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]
agent_3_total_balance = [log["3"]["endogenous"]["TotalBalance"] for log in dense_logs[0]['states']]

stock_prices = [log["0"]["endogenous"]["StockPrice"] for log in dense_logs[0]['states']]

range = np.arange(0, 100 + 1, 2.0)

In [None]:
plt.figure(figsize=(16, 6))
plt.plot(np.arange(0,101),agent_0_total_balance, label="Agent 0")
plt.plot(np.arange(0,101),agent_1_total_balance, label="Agent 1")
plt.plot(np.arange(0,101),agent_2_total_balance, label="Agent 2")
plt.plot(np.arange(0,101),agent_3_total_balance, label="Agent 3")
plt.title('Stock Broker Total Balance')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Total Balance')
plt.xticks(range)

# Adding vertical lines at each xtick position
for xtick in range:
    plt.axvline(x=xtick, color='gray', linestyle='--', linewidth=0.5)
    
#plt.savefig("miners_green_scores.png")
plt.show()

In [None]:
plt.figure(figsize=(16, 6))
plt.plot(np.arange(0, 101), stock_prices, label="Stock Price")
plt.title('Stock Price Over Time')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Stock Price')
plt.xticks(range)

# Adding vertical lines at each xtick position
for xtick in range:
    plt.axvline(x=xtick, color='gray', linestyle='--', linewidth=0.5)
    
plt.show()

In [None]:
volumes = [log["0"]["endogenous"]["Volumes"] for log in dense_logs[0]['states']]
prices = [log["0"]["endogenous"]["StockPriceHistory"] for log in dense_logs[0]['states']]

agent_0_demand = [log["0"]["endogenous"]["Demand"] for log in dense_logs[0]['states']]
agent_1_demand = [log["1"]["endogenous"]["Demand"] for log in dense_logs[0]['states']]
agent_2_demand = [log["2"]["endogenous"]["Demand"] for log in dense_logs[0]['states']]
agent_3_demand = [log["3"]["endogenous"]["Demand"] for log in dense_logs[0]['states']]

agent_0_supply = [log["0"]["endogenous"]["Supply"] for log in dense_logs[0]['states']]
agent_1_supply = [log["1"]["endogenous"]["Supply"] for log in dense_logs[0]['states']]
agent_2_supply = [log["2"]["endogenous"]["Supply"] for log in dense_logs[0]['states']]
agent_3_supply = [log["3"]["endogenous"]["Supply"] for log in dense_logs[0]['states']]

agents_demands = [x + y + z + f for x, y, z, f in zip(agent_0_demand, agent_1_demand,agent_2_demand, agent_3_demand )]
agents_supplies = [x + y + z + f for x, y, z, f in zip(agent_0_supply, agent_1_supply,agent_2_supply, agent_3_supply )]

i=0
while i < 101:
    print("--- Summary of Day", i, "---")
    print("Price: ", prices[-1][i])
    print("Volume: ", volumes[-1][i])
    print("Demand: ", agents_demands[i], "  Supply: ", agents_supplies[i])
    demand = agents_demands[i]
    supply = agents_supplies[i]
    if demand+supply>0:
        test = (demand - supply) / (demand + supply)
        test = 0.05*test
        #print(test)
    else:
        pass
        #print(0)
    print("")
    i+=1



In [None]:
stocks_left = [log["0"]["endogenous"]["StocksLeft"] for log in dense_logs[0]['states']]

plt.plot(np.arange(0,101),stocks_left, label="Stocks Quantity Left")
plt.title('Stock Quantity Over Time')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Stock Price')
plt.xticks(np.arange(0, 100+1, 5.0))
#plt.savefig("miners_green_scores.png")
plt.show()

In [None]:
# Shutdown Ray after use
ray.shutdown()

In [None]:
#planner_gr_score_importances = [log["p"]["GreenScoreImportance"] for log in dense_logs[0]['states']]

In [None]:
#dense_logs[0]['states'][0]