In [7]:
import json
import os
from typing import Optional, Tuple
from datetime import datetime
import fire

import numpy as np
from sb3_contrib.ppo_mask import MaskablePPO
from stable_baselines3.common.callbacks import BaseCallback
from alphagen.data.calculator import AlphaCalculator

from alphagen.data.expression import *
from alphagen.models.alpha_pool import AlphaPool, AlphaPoolBase
from alphagen.rl.env.wrapper import AlphaEnv
from alphagen.rl.policy import LSTMSharedNet, TransformerSharedNet
from alphagen.utils.random import reseed_everything
from alphagen.rl.env.core import AlphaEnvCore
from alphagen_qlib.calculator import QLibStockDataCalculator

In [10]:
from tfm_train_ppo import CustomCallback
from alphagen.utils import reseed_everything


def objective(trial):
    seed = 0
    instruments = "csi500"
    pool_capacity = 10
    steps = 20_000

    reseed_everything(seed)

    device = torch.device('cuda:0')
    close = Feature(FeatureType.CLOSE)
    target = Ref(close, -1) / close - 1

    data_train = StockData(instrument=instruments,
                           start_time='2022-01-01',
                           end_time='2022-12-31')
    data_valid = StockData(instrument=instruments,
                           start_time='2023-01-01',
                           end_time='2023-03-31')
    data_test = StockData(instrument=instruments,
                          start_time='2023-04-01',
                          end_time='2023-06-30')
    calculator_train = QLibStockDataCalculator(data_train, target)
    calculator_valid = QLibStockDataCalculator(data_valid, target)
    calculator_test = QLibStockDataCalculator(data_test, target)

    pool = AlphaPool(
        capacity=pool_capacity,
        calculator=calculator_train,
        ic_lower_bound=0.5,
        l1_alpha=5e-3
    )
    env = AlphaEnv(pool=pool, device=device, print_expr=True)

    name_prefix = f"new_{instruments}_{pool_capacity}_{seed}"
    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

    checkpoint_callback = CustomCallback(
        save_freq=10000,
        show_freq=10000,
        save_path='/save',
        valid_calculator=calculator_valid,
        test_calculator=calculator_test,
        name_prefix=name_prefix,
        timestamp=timestamp,
        verbose=1,
    )

    ent_coef = trial.suggest_loguniform('ent_coef', 1e-3, 1e-1)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)

    model = MaskablePPO(
        'MlpPolicy',
        env,
        policy_kwargs=dict(
            features_extractor_class=TransformerSharedNet,
            features_extractor_kwargs=dict(
                n_encoder_layers=6,
                d_model=128,
                n_head=4,
                d_ffn=2048,
                dropout=0.1,
                device=device,
            ),
        ),
        gamma=1.,
        ent_coef=ent_coef,
        batch_size=256,
        learning_rate=learning_rate,
        tensorboard_log='/tensorboard',
        device=device,
        verbose=1,
    )
    model.learn(
        total_timesteps=steps,
        tb_log_name=f'{name_prefix}_{timestamp}',
    )

    ic_test, rank_ic_test, ir = pool.test_ensemble(calculator_test)
    return ic_test

In [11]:
import optuna
from optuna.samplers import TPESampler

def main_optuna(trials: int = 50):
    study = optuna.create_study(direction='maximize', sampler=TPESampler())
    study.optimize(objective, n_trials=trials)

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

main_optuna()

[I 2024-08-08 11:42:18,343] A new study created in memory with name: no-name-ab949174-4171-4842-871c-7ac5403c4026
  ent_coef = trial.suggest_loguniform('ent_coef', 1e-3, 1e-1)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)


Using cuda:0 device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


[W 2024-08-08 11:43:06,268] Trial 0 failed with parameters: {'ent_coef': 0.037743271115619176, 'learning_rate': 0.00023522256997318374} because of the following error: ValueError('Expected file or str, got <colorama.ansitowin32.StreamWrapper object at 0x000001A4ED761D90>').
Traceback (most recent call last):
  File "C:\Users\liush\anaconda3\envs\alphagen\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\liush\AppData\Local\Temp\ipykernel_26468\2655582327.py", line 77, in objective
    model.learn(
  File "C:\Users\liush\anaconda3\envs\alphagen\lib\site-packages\sb3_contrib\ppo_mask\ppo_mask.py", line 514, in learn
    total_timesteps, callback = self._setup_learn(
  File "C:\Users\liush\anaconda3\envs\alphagen\lib\site-packages\sb3_contrib\ppo_mask\ppo_mask.py", line 247, in _setup_learn
    self._logger = utils.configure_logger(self.verbose, self.tensorboard_log, tb_log_name, reset_num_timesteps)
  File "C:\Users\l

ValueError: Expected file or str, got <colorama.ansitowin32.StreamWrapper object at 0x000001A4ED761D90>