In [1]:
import os
os.chdir('..')

# Import Files

In [2]:
from tqdm import tqdm

In [3]:
from src.agent_interface import *
from src.agent import *
from src.env import *
from src.price_data import *
from src.prior_measure import *
from src.q import *
from src.robust import *

# Initialize Environment

In [None]:
start_date = '1995-01-01'
end_date = '2024-12-31'
batch_size = 32

stock_params = {
    'symbol': 'SPY',
    'start_date': start_date,
    'end_date': end_date
}

env_params = {
    'start_date': start_date,
    'end_date': end_date,
    'rf_rate': 0.024,
    'trans_cost': 0.005,
    'batch_size': batch_size,
    'logging': True,
    'seed': 42
}

In [10]:
yf = YahooFinance(**stock_params)
df = yf.pipeline()
asset_log_returns = df['log_return'].dropna().to_numpy()
env = PortfolioEnv(asset_log_returns=asset_log_returns, **env_params)

[*********************100%***********************]  1 of 1 completed


# Initialize Agent

In [None]:
device = torch.device("cuda")

#Shared params
n_updates = 1
state_dim = 63
action_dim = 1

training_controller_params = {
    'train_steps': 1,
    'clone_steps': 50,
    'batch_size': batch_size,
    'n_batches': n_updates,
}

duality_params = {
    'discount_rate': 0.99,
    'delta': 1e-4,
    'sinkhorn_radius': 0.003,
}

q_params = {
    'input_size': state_dim,
    'hidden_size': [64, 64],
    'output_size': action_dim
}

dqn_params = {
    'state_dim': state_dim,
    'action_dim': action_dim,
    'batch_size': batch_size,
    'n_updates': 10,
    'epsilon': 0.1,
    'device': device,
    'seed': 123
}

In [7]:
training_controller = TrainingController(**training_controller_params)
prior_measure = PriorStudentDistribution(device=device)
duality_operator = DualityHQOperator(**duality_params)
q = QFunc(**q_params)
agent = PORDQN(training_controller=training_controller, prior_measure=prior_measure, duality_operator=duality_operator,
               qfunc = q, **dqn_params)

# Training

In [None]:
n_epochs = 1
for epoch in range(1 , n_epochs+1):
    cum_rewards = np.zeros(shape=(batch_size, 1))
    observation, _ = env.reset()
    action_idx = agent.agent_start(observation)
    steps = env.action_steps
    done = np.array([False]*batch_size)
    
    with tqdm(total=steps, desc=f"Episode {epoch}", mininterval=2) as step_bar:
        while not done.any():
            next_state, reward, done, truncated, info = env.step(action_idx)
            cum_rewards += reward
            if done.any():
                agent.agent_end(reward=reward, observation=next_state, info=info)
            else:
                action_idx = agent.agent_step(reward=reward, observation=next_state, info=info)
            
            step_bar.update(1)

Episode 1:   0%|          | 0/7492 [00:00<?, ?it/s]


RuntimeError: The expanded size of the tensor (128) must match the existing size (32) at non-singleton dimension 0.  Target sizes: [128].  Tensor sizes: [32]