In [1]:
import os


In [2]:
agent_dir = "/Users/lorecampa/Desktop/Projects/ICAIF24-challenge/agents"

ppo_agent_dir = f'{agent_dir}/ppo/first_trial'
agent_paths = [os.path.join(ppo_agent_dir, agent) for agent in os.listdir(ppo_agent_dir)]
assert all([os.path.exists(agent_path) for agent_path in agent_paths]), "Some agent paths do not exist"

AGENTS = [{"type": "ppo", "file": agent_path} for agent_path in agent_paths]


In [3]:
import glob


policy_list = glob.glob(f'{agent_dir}/fqi/trial_2_window_stap_gap_2/Policy_iter3.pkl')
# fqi_policy = policy_list[0].split(agent_dir+'/')[1]
fqi_policy = policy_list[0]
AGENTS.append({"type": "fqi", "file": fqi_policy})
AGENTS

[{'type': 'ppo',
  'file': '/Users/lorecampa/Desktop/Projects/ICAIF24-challenge/agents/ppo/first_trial/PPO_window_12_12'},
 {'type': 'ppo',
  'file': '/Users/lorecampa/Desktop/Projects/ICAIF24-challenge/agents/ppo/first_trial/PPO_window_11_11'},
 {'type': 'ppo',
  'file': '/Users/lorecampa/Desktop/Projects/ICAIF24-challenge/agents/ppo/first_trial/PPO_window_9_9'},
 {'type': 'ppo',
  'file': '/Users/lorecampa/Desktop/Projects/ICAIF24-challenge/agents/ppo/first_trial/PPO_window_10_10'},
 {'type': 'fqi',
  'file': '/Users/lorecampa/Desktop/Projects/ICAIF24-challenge/agents/fqi/trial_2_window_stap_gap_2/Policy_iter3.pkl'}]

In [4]:
from agent.factory import AgentsFactory


agents = [AgentsFactory.load_agent(agent) for agent in AGENTS]
for agent in agents:
    agent.load()
agents


Loading 1 seeds, ['seed_3657626357.zip']
Loading 1 seeds, ['seed_1299339297.zip']
Loading 1 seeds, ['seed_356811696.zip']
Loading 1 seeds, ['seed_445177589.zip']


[<agent.online_rl.AgentOnlineRl at 0x108fad810>,
 <agent.online_rl.AgentOnlineRl at 0x108fafe20>,
 <agent.online_rl.AgentOnlineRl at 0x1756e5060>,
 <agent.online_rl.AgentOnlineRl at 0x1756e7ac0>,
 <agent.fqi.AgentFQI at 0x1756e50f0>]

In [5]:
from agent.base import AgentBase
from erl_config import build_env
import torch as th
from trade_simulator import EvalTradeSimulator
import numpy as np


def evaluate_agent(agent: AgentBase, args, eval_sequential: bool = False):
    num_eval_sims = args.get("num_sims", 1)
    print('Num eval sims: ', num_eval_sims)
    device = th.device("cpu")

    eval_env = build_env(args["env_class"], args, gpu_id=-1)
    seed = args.get("seed", None)
    

    state, _ = eval_env.reset(seed=seed, eval_sequential=eval_sequential)
    print('State shape: ', state.shape)
    
    total_reward = th.zeros(num_eval_sims, dtype=th.float32, device=device)
    rewards = th.empty((0, num_eval_sims), dtype=th.float32, device=device)
    
    
    print('Max step: ', eval_env.max_step)
    
    for i in range(eval_env.max_step):
        
        action = agent.action(state)
        print('Action: ', action.shape)
        # action = th.tensor(action).to(device)
        action = th.from_numpy(action).to(device)            
        state, reward, terminated, truncated, _ = eval_env.step(action=action)
        
        rewards = th.cat((rewards, reward.unsqueeze(0)), dim=0)
            
        total_reward += reward

        if terminated.any() or truncated:
            break
    
    print(f'Steps: {i}')
    # print(total_reward)
    
    mean_total_reward = total_reward.mean().item()
    std_total_reward = total_reward.std().item() if num_eval_sims > 1 else 0.
    mean_std_reward = rewards.std(dim=0).mean().item()

    print(f'Sims mean: {mean_total_reward} Sims std: {std_total_reward}, Mean std reward: {mean_std_reward}')
    
    
    return mean_total_reward


In [7]:
from agent.base import AgentBase


def model_selection(agents: AgentBase):
    agent = agents[-1]
    num_sims = 1
    
    eval_env_args = {
        "env_name": "TradeSimulator-v0",
        "num_envs": 1,
        "num_sims": num_sims,
        "state_dim": 10,
        "action_dim": 3,
        "if_discrete": True,
        "max_position": 1,
        "slippage": 7e-7,
        "step_gap": 2,
        "eval_sequential": False,
        "env_class": EvalTradeSimulator,
        "max_step": 480,
        "days": [10, 10]
    }
    mean_total_reward = evaluate_agent(agent, eval_env_args)
    
    
    return mean_total_reward
    
    
    
    
    
    
model_selection(agents)

Num eval sims:  1
State shape:  torch.Size([1, 10])
Max step:  480


AttributeError: 'int' object has no attribute 'shape'