# Import packages

In [None]:
%pip install -r requirements.txt -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com

In [None]:
import os
import random
import requests
import time
import typing
from typing import Any, Callable, Dict, Type
import warnings

from boruta import BorutaPy
import numpy as np
import optuna
from optuna.visualization import plot_optimization_history, plot_contour, plot_edf, \
    plot_intermediate_values, plot_optimization_history, plot_parallel_coordinate, \
    plot_param_importances, plot_slice
import pandas as pd
from sb3_contrib import MaskablePPO
from sb3_contrib.common.maskable.callbacks import MaskableEvalCallback
from sb3_contrib.common.maskable.evaluation import evaluate_policy
from sklearn.ensemble import RandomForestRegressor
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.callbacks import StopTrainingOnNoModelImprovement
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
import stockstats
import tushare
import yfinance as yf

from environment.SingleStockTradingEnv import SingleStockTradingEnv
from maskable.MaskableDQN import MaskableDQN
from maskable.MaskableIQN import MaskableIQN
from maskable.MaskableQRDQN import MaskableQRDQN
from utils.sample_funcs import *
from utils.utils import *

# Config

In [2]:
warnings.simplefilter(action='ignore', category=FutureWarning)

# Setup directories
DATA_SAVE_DIR = 'datasets'
MODEL_DIR = 'models'
TENSORBOARD_LOG_DIR = 'tensorboard_log'
RAW_DATA_DIR = os.path.join(DATA_SAVE_DIR, 'raw')
CLEAN_DATA_DIR = os.path.join(DATA_SAVE_DIR, 'clean')
PREPROCESSED_DATA_DIR = os.path.join(DATA_SAVE_DIR, 'preprocessed')

check_and_make_directories([DATA_SAVE_DIR, MODEL_DIR, TENSORBOARD_LOG_DIR, \
     RAW_DATA_DIR, CLEAN_DATA_DIR, PREPROCESSED_DATA_DIR])

TRAIN_START_DAY = '2008-01-01'
TRAIN_END_DAY = '2016-12-31'
TEST_START_DAY = '2017-01-01'
TEST_END_DAY = '2019-12-31'
TRADE_START_DAY = '2020-01-01'
TRADE_END_DAY = '2022-12-31'

tushare_token = '2bf5fdb105eefda26ef27cc9caa94e6f31ca66e408f7cc54d4fce032'

# Download data

## Download CSI300-components ticker list

In [4]:
def download_csi300_component_ticker_list(url: str, download_dir: str) -> List[str]:
    download_file_name = url.split('/')[-1]
    download_file_path = os.path.join(download_dir, download_file_name)
    if not os.path.exists(download_file_path):
        r =requests.get(url)
        with open(download_file_path, 'wb') as f:
            f.write(r.content)
            f.close()
    df = pd.read_excel(download_file_path)
    df.loc[df['交易所Exchange'] == '上海证券交易所', 'suffix'] = 'SS'
    df.loc[df['交易所Exchange'] == '深圳证券交易所', 'suffix'] = 'SZ'
    tic_list = [f'{code:06d}.{suffix}' for code, suffix in zip(df['成分券代码Constituent Code'], df['suffix'])]
    return tic_list

In [43]:
url = r'https://csi-web-dev.oss-cn-shanghai-finance-1-pub.aliyuncs.com/static/html/csindex/public/uploads/file/autofile/cons/000300cons.xls'
tic_list = download_csi300_component_ticker_list(url, DATA_SAVE_DIR)

## Download CSI300 tickers with yfinace

In [75]:
def download_ticker_with_yfince(tic_list: List[str], download_dir: str) -> List[str]:
    retry_list = []
    for tic in tic_list:
        csv_path = os.path.join(download_dir, f'{tic}.csv')
        if os.path.exists(csv_path):
            print(f'File {csv_path} already exist. Skip')
            continue
        
        ticker = yf.Ticker(tic)
        df = ticker.history(period='max')
        if df.shape[0] > 0:
            df.to_csv(csv_path)
            print(f'Download {tic}.csv')         
            time.sleep(0.1)
        else:
            retry_list.append(tic)
    
    return retry_list

In [77]:
retry_list = download_ticker_with_yfince(tic_list, RAW_DATA_DIR)

Download 300601.SZ.csv
Download 300628.SZ.csv
Download 603659.SS.csv
Failed to get ticker '002916.SZ' reason: HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Max retries exceeded with url: /v8/finance/chart/002916.SZ?range=1d&interval=1d (Caused by ProxyError('Cannot connect to proxy.', timeout('_ssl.c:1112: The handshake operation timed out')))
- 002916.SZ: No timezone found, symbol may be delisted
Download 002916.SZ.csv
600036.SS: No data found for this date range, symbol may be delisted
Download 600036.SS.csv
Download 000776.SZ.csv
Download 600089.SS.csv
Download 600884.SS.csv
Download 600085.SS.csv
Download 601360.SS.csv
Download 603259.SS.csv
Download 300454.SZ.csv
Download 601066.SS.csv
Download 300760.SZ.csv
Download 300751.SZ.csv
Download 601838.SS.csv
Download 000651.SZ.csv
Download 000661.SZ.csv
Download 000733.SZ.csv
Download 000858.SZ.csv
Download 002050.SZ.csv
Download 600048.SS.csv
Download 002236.SZ.csv
Download 601111.SS.csv
Download 300763.SZ.csv
Downlo

In [None]:
retry_list = download_ticker_with_yfince(retry_list, RAW_DATA_DIR)

# Clean data

In [159]:
def get_calendar_with_tushare(start: str, end: str) -> pd.Series:
    start = start.replace('-', '')
    end = end.replace('-', '')

    tushare.set_token(tushare_token)
    tu_pro = tushare.pro_api()
    calendar_ss = tu_pro.trade_cal(exchange='SSE', start_date=start, end_date=end, is_open=1)
    calendar_sz = tu_pro.trade_cal(exchange='SZSE', start_date=start, end_date=end, is_open=1)
    if calendar_ss.shape[0] != calendar_ss.shape[0]:
        calendar = pd.merge(calendar_ss.cal_date, calendar_sz.cal_date, on=['cal_date'], how='outer')
    else:
        calendar = calendar_ss.cal_date

    calendar = pd.to_datetime(calendar, format='%Y%m%d')
    calendar.rename('date', inplace=True)
    
    return calendar

In [160]:
def clean_data_from_yfinance(data: pd.DataFrame, calendar: pd.Series = None) -> pd.DataFrame:
    # TODO: calculate adjusted price.
    data.drop(labels=['Dividends', 'Stock Splits'], axis='columns', inplace=True)
    data.rename(columns={
        'Date': 'date',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close' : 'close',
        'Volume' : 'volume'
        }, inplace=True)
    data['date'] = pd.to_datetime(data['date'].apply(lambda s: s.split(' ')[0]), format='%Y-%m-%d')
    data = pd.merge(calendar, data, how='left', on='date')

    return data

In [280]:
NA_THRESHOLD = 0.1

calendar = get_calendar_with_tushare(TRAIN_START_DAY, TRADE_END_DAY)

na_list = []
_, _, files = next(os.walk(RAW_DATA_DIR))
for file in files:
    result_path = os.path.join(CLEAN_DATA_DIR, file)
    if os.path.exists(result_path):
        continue

    file_path = os.path.join(RAW_DATA_DIR, file)
    df = pd.read_csv(file_path, index_col=False)

    df = clean_data_from_yfinance(df, calendar)

    len_df = df.shape[0]
    df.dropna(inplace=True)
    df.drop_duplicates(['open', 'high', 'low', 'close', 'volume'], inplace=True)
    print(f'{len_df - df.shape[0]} rows droped from {file}.')

    if df.shape[0] >= len(calendar) * (1 - NA_THRESHOLD):
        df.to_csv(result_path, index=False)
    else:
        na_list.append(file)
        print(f'{file}: too many NaNs, discard.')

71 rows droped from 000001.SZ.csv.
151 rows droped from 000002.SZ.csv.
81 rows droped from 000063.SZ.csv.
66 rows droped from 000069.SZ.csv.
207 rows droped from 000100.SZ.csv.
64 rows droped from 000157.SZ.csv.
255 rows droped from 000166.SZ.csv.
179 rows droped from 000301.SZ.csv.
1445 rows droped from 000333.SZ.csv.
Too many NaN for data in 000333.SZ.csv.
44 rows droped from 000338.SZ.csv.
389 rows droped from 000408.SZ.csv.
Too many NaN for data in 000408.SZ.csv.
83 rows droped from 000425.SZ.csv.
170 rows droped from 000538.SZ.csv.
33 rows droped from 000568.SZ.csv.
9 rows droped from 000596.SZ.csv.
176 rows droped from 000625.SZ.csv.
165 rows droped from 000651.SZ.csv.
39 rows droped from 000661.SZ.csv.
109 rows droped from 000708.SZ.csv.
343 rows droped from 000723.SZ.csv.
21 rows droped from 000725.SZ.csv.
34 rows droped from 000733.SZ.csv.
60 rows droped from 000768.SZ.csv.
534 rows droped from 000776.SZ.csv.
Too many NaN for data in 000776.SZ.csv.
150 rows droped from 000786.

In [281]:
print(len(na_list))
' '.join(na_list)

165


'000333.SZ.csv 000408.SZ.csv 000776.SZ.csv 000792.SZ.csv 001289.SZ.csv 002129.SZ.csv 002252.SZ.csv 002304.SZ.csv 002311.SZ.csv 002352.SZ.csv 002371.SZ.csv 002410.SZ.csv 002414.SZ.csv 002415.SZ.csv 002459.SZ.csv 002460.SZ.csv 002466.SZ.csv 002475.SZ.csv 002493.SZ.csv 002555.SZ.csv 002594.SZ.csv 002600.SZ.csv 002601.SZ.csv 002602.SZ.csv 002648.SZ.csv 002709.SZ.csv 002714.SZ.csv 002736.SZ.csv 002756.SZ.csv 002812.SZ.csv 002821.SZ.csv 002841.SZ.csv 002916.SZ.csv 002920.SZ.csv 002938.SZ.csv 003816.SZ.csv 300014.SZ.csv 300015.SZ.csv 300033.SZ.csv 300059.SZ.csv 300122.SZ.csv 300124.SZ.csv 300142.SZ.csv 300207.SZ.csv 300223.SZ.csv 300274.SZ.csv 300316.SZ.csv 300347.SZ.csv 300408.SZ.csv 300413.SZ.csv 300433.SZ.csv 300450.SZ.csv 300454.SZ.csv 300496.SZ.csv 300498.SZ.csv 300529.SZ.csv 300595.SZ.csv 300601.SZ.csv 300628.SZ.csv 300661.SZ.csv 300750.SZ.csv 300751.SZ.csv 300759.SZ.csv 300760.SZ.csv 300763.SZ.csv 300769.SZ.csv 300782.SZ.csv 300896.SZ.csv 300919.SZ.csv 300957.SZ.csv 300979.SZ.csv 30099

# Feature engineering

In [178]:
# columns after init_all()
df = pd.read_csv('./datasets/clean/000001.SZ.csv', index_col=False)
stats = stockstats.StockDataFrame.retype(df)
stats.init_all()
stats.columns

Index(['open', 'high', 'low', 'close', 'volume', 'change', 'rs_14', 'rsi',
       'rsi_14', 'stochrsi', 'rate', 'middle', 'tp', 'boll', 'boll_ub',
       'boll_lb', 'macd', 'macds', 'macdh', 'ppo', 'ppos', 'ppoh', 'rsv_9',
       'kdjk_9', 'kdjk', 'kdjd_9', 'kdjd', 'kdjj_9', 'kdjj', 'cr', 'cr-ma1',
       'cr-ma2', 'cr-ma3', 'cci', 'tr', 'atr', 'high_delta', 'um', 'low_delta',
       'dm', 'pdm', 'pdm_14_ema', 'pdm_14', 'atr_14', 'pdi_14', 'pdi', 'mdm',
       'mdm_14_ema', 'mdm_14', 'mdi_14', 'mdi', 'dx_14', 'dx', 'adx', 'adxr',
       'trix', 'tema', 'vr', 'close_10_sma', 'close_50_sma', 'dma', 'vwma',
       'chop', 'log-ret', 'mfi', 'wt1', 'wt2', 'wr', 'supertrend_ub',
       'supertrend_lb', 'supertrend'],
      dtype='object')

In [283]:
X_y_filename = 'x_y.csv'
X_y_path = os.path.join(DATA_SAVE_DIR, X_y_filename)

if not os.path.exists(X_y_path):
    X_y = None

    _, _, files = next(os.walk(CLEAN_DATA_DIR))
    for file in files:
        file_path = os.path.join(CLEAN_DATA_DIR, file)
        df = pd.read_csv(file_path, index_col=False)
        stats = stockstats.StockDataFrame.retype(df)
        stats.init_all()

        # drop duplicated columns
        stats.drop_column(['rsi', 'kdjk', 'kdjd', 'kdjj'], inplace=True)

        # add additional indicators: close_14_smma, close_14_mstd, close_14_mvar,
        # close_5_sma, wr_6, rsi_6,
        # log differential of high, low, open and volume
        # and log2(close / open)
        stats['close_14_smma']; stats['close_14_mstd']; stats['close_14_mvar'];
        stats['close_5_sma'];   stats['wr_6'];  stats['rsi_6']
        stats['log_diff_high'] = np.log2(stats['high'] / stats['high_-1_s'])
        stats['log_diff_low']= np.log2(stats['low'] / stats['low_-1_s'])
        stats['log_diff_open']= np.log2(stats['open'] / stats['open_-1_s'])
        stats['log_diff_vol']= np.log2(stats['volume'] / stats['volume_-1_s'])
        stats['log_close/open'] = np.log2(stats['close'] / stats['open'])
        stats.drop_column(['high_-1_s', 'low_-1_s', 'open_-1_s'], inplace=True)
        stats['log-ret_1_s']
        stats.rename(columns={'log-ret_1_s': 'y'}, inplace=True)

        # drop date
        stats.reset_index(drop=True, inplace=True)

        # deal with nan
        stats.dropna(inplace=True)

        if X_y is None:
            X_y = stats.copy()
        else:
            X_y = pd.concat([X_y, stats])
        print(f'Add {file} to X_y.')

X_y.to_csv(X_y_path, index=False)

Added 000001.SZ.csv.
Added 000002.SZ.csv.
Added 000063.SZ.csv.
Added 000069.SZ.csv.
Added 000100.SZ.csv.
Added 000157.SZ.csv.
Added 000166.SZ.csv.
Added 000301.SZ.csv.
Added 000338.SZ.csv.
Added 000425.SZ.csv.
Added 000538.SZ.csv.
Added 000568.SZ.csv.
Added 000596.SZ.csv.
Added 000625.SZ.csv.
Added 000651.SZ.csv.
Added 000661.SZ.csv.
Added 000708.SZ.csv.
Added 000723.SZ.csv.
Added 000725.SZ.csv.
Added 000733.SZ.csv.
Added 000768.SZ.csv.
Added 000786.SZ.csv.
Added 000800.SZ.csv.
Added 000858.SZ.csv.
Added 000876.SZ.csv.
Added 000877.SZ.csv.
Added 000895.SZ.csv.
Added 000938.SZ.csv.
Added 000963.SZ.csv.
Added 000977.SZ.csv.
Added 001979.SZ.csv.
Added 002001.SZ.csv.
Added 002007.SZ.csv.
Added 002008.SZ.csv.
Added 002027.SZ.csv.
Added 002032.SZ.csv.
Added 002049.SZ.csv.
Added 002050.SZ.csv.
Added 002064.SZ.csv.
Added 002074.SZ.csv.
Added 002120.SZ.csv.
Added 002142.SZ.csv.
Added 002179.SZ.csv.
Added 002180.SZ.csv.
Added 002202.SZ.csv.
Added 002230.SZ.csv.
Added 002236.SZ.csv.
Added 002241.

# Feature selection

In [None]:
X_y_filename = 'x_y.csv'
X_y_path = os.path.join(DATA_SAVE_DIR, X_y_filename)
if X_y is None:
    X_y = pd.read_csv(X_y_path, index_col=False)

In [None]:
model = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)

feat_selector = BorutaPy(
    verbose=2,
    estimator=model,
    n_estimators='auto',
    max_iter=10
)

X = np.array(X_y.drop(labels=['y']))
y = np.array(X_y['y'])
feat_selector.fit(X, y)

# print support and ranking for each feature
print("\n------Support and Ranking for each feature------")
for i in range(len(feat_selector.support_)):
    if feat_selector.support_[i]:
        print("Passes the test: ", X.columns[i],
              " - Ranking: ", feat_selector.ranking_[i])
    else:
        print("Doesn't pass the test: ",
              X.columns[i], " - Ranking: ", feat_selector.ranking_[i])

Features:
* volume
* ppo
* cr-ma3
* trix 
* log_diff_high
* log_diff_low
* log_diff_open
* log_close/open

# Preprocess data

In [7]:
df_dict = {}

_, _, files = next(os.walk(CLEAN_DATA_DIR))
for file in files:
    # skip if already exists
    processed_file_path = os.path.join(PREPROCESSED_DATA_DIR, file)
    if os.path.exists(processed_file_path):
        continue
    
    # load
    clean_file_path = os.path.join(CLEAN_DATA_DIR, file)
    stats = pd.read_csv(clean_file_path, index_col=False)
    df = pd.DataFrame(index=stats['date'])

    stats = stockstats.StockDataFrame.retype(stats)
    df['close'] = stats['close']

    # add indicators
    df['ppo'] = stats['ppo']
    df['cr-ma3'] = stats['cr-ma3']
    df['trix'] = stats['trix']

    # add differential features
    df['log_close/open'] = np.log2(stats['close'] / stats['open'])
    df['log-ret'] = stats['log-ret']
    df['log_diff_high'] = np.log2(stats['high'] / stats['high_-1_s'])
    df['log_diff_low'] = np.log2(stats['low'] / stats['low_-1_s'])
    df['log_diff_open'] = np.log2(stats['open'] / stats['open_-1_s'])

    # clean
    df.dropna(inplace=True)
    df.reset_index(inplace=True)

    # save
    df.to_csv(processed_file_path, index=False)
    tic = file.split('.')[0]
    df_dict[tic] = df.copy()

# Setup environment

In [3]:
def env_factory(dfs: List[pd.DataFrame]) -> SingleStockTradingEnv:
    '''wrap single stock trading environment with monitor.'''
    return Monitor(SingleStockTradingEnv(dfs, 5000_000, stack_frame=10))

In [4]:
# Just load data
df_dict = {}
_, _, files = next(os.walk(PREPROCESSED_DATA_DIR))
for file in files:
    processed_file_path = os.path.join(PREPROCESSED_DATA_DIR, file)   
    df = pd.read_csv(processed_file_path, index_col=False)
    assert df.isna().sum().sum() == 0, f'Nan found in {file}.'
    tic = file.split('.')[0]
    df_dict[tic] = df.copy()

In [5]:
# Split data
df_dict_train = dict()
df_dict_test = dict()
df_dict_trade = dict()

TEST_START_DAY = pd.to_datetime(TEST_START_DAY, format='%Y-%m-%d')
TRADE_START_DAY = pd.to_datetime(TRADE_START_DAY, format='%Y-%m-%d')

for tic, df in df_dict.items():
    df.date = pd.to_datetime(df.date, format='%Y-%m-%d')
    df_dict_train[tic] = df.loc[df.date < TEST_START_DAY].sort_index(ascending=True).copy()
    df_dict_test[tic] = df.loc[(df.date >= TEST_START_DAY) & (df.date < TRADE_START_DAY)].sort_index(ascending=True).copy()
    df_dict_trade[tic] = df.loc[df.date >= TRADE_START_DAY].sort_index(ascending=True).copy()

# create env
env_train = env_factory(list(df_dict_train.values()))
env_test = env_factory(list(df_dict_test.values()))
env_trade = env_factory(list(df_dict_trade.values()))

# Hyper parameter tuning

In [6]:
VERBOSE = 0

In [11]:
def objective_factory(
    model_name: str, 
    model_class: Type[BaseAlgorithm], 
    sample_param_func: Callable[[optuna.Trial], Tuple[Dict, int]],
    ) -> Callable[[optuna.Trial], float]:
    
    def objective(trial: optuna.Trial):
        model_path = os.path.join(MODEL_DIR, model_name)
        model_path = os.path.join(model_path, f'trial_{trial.number}_best_model')
        tb_log_path = os.path.join(TENSORBOARD_LOG_DIR, model_name)
        check_and_make_directories([model_path, tb_log_path])

        # Create model with sampled hyperparameters and 
        # train it with early stop callback    
        hyperparameters, total_timesteps = sample_param_func(trial)
        hyperparameters['tensorboard_log'] = '/root/tf-logs/' # tb_log_path #

        model = model_class('MlpPolicy', env_train, **hyperparameters)

        stop_train_callback = StopTrainingOnNoModelImprovement(
            max_no_improvement_evals=4, min_evals=2, verbose=VERBOSE)
        eval_callback = MaskableEvalCallback(
            env_test, 
            callback_after_eval=stop_train_callback,
            n_eval_episodes=3,
            eval_freq=10000,
            best_model_save_path=model_path, 
            verbose=VERBOSE
            )
        try:
            model.learn(total_timesteps=total_timesteps, 
                tb_log_name=f'{model_name}_{trial.number}', callback=eval_callback)
        except ValueError as e:
            print(e)
            return -999

        # validation
        mean_reward, _ = evaluate_policy(model, env_test, n_eval_episodes=3)

        return mean_reward

    return objective

In [12]:
def tune(
    model_name: str, 
    model_class: Type[BaseAlgorithm],
    sample_param_func: Callable[[optuna.Trial], Any],
    n_trials: int = 100, 
    callbacks: List[Callable] = None
    ) -> optuna.Study:

    sampler = optuna.samplers.TPESampler(seed=None)
    objective = objective_factory(model_name, model_class, sample_param_func)

    study = optuna.create_study(
        study_name=f'{model_name}_study', 
        direction='maximize',
        sampler=sampler,
        pruner=optuna.pruners.HyperbandPruner()
        )
    study.optimize(
        objective, 
        n_trials=n_trials,
        callbacks=callbacks,
        )

    return study

In [9]:
# TODO: test with strict condition
early_stop_callback = PruneCallback(
    threshold=1,
    patience=1,
    trial_number=1
    )

In [13]:
# study_mppo = tune('MaskablePPO', MaskablePPO, \
#     sample_param_func=sample_mppo_param)
study_mdqn = tune('MaskableDQN', MaskableDQN, \
    sample_param_func=sample_mdqn_param)
# study_mppo = tune('MaskableQRDQN', MaskableQRDQN, \
#     sample_param_func=sample_mqrdqn_param)
# study_mdqn = tune('MaskableIQN', MaskableIQN, \
#     sample_param_func=sample_miqn_param)

plot_optimization_history(study_mdqn)
plot_param_importances(study_mdqn)

[32m[I 2023-01-31 13:00:33,568][0m A new study created in memory with name: MaskableDQN_study[0m
[33m[W 2023-01-31 13:03:33,532][0m Trial 0 failed because of the following error: TypeError("cannot pickle 'tensorflow.python.lib.io._pywrap_file_io.WritableFile' object")[0m
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.8/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_643908/1085890602.py", line 31, in objective
    model.learn(total_timesteps=total_timesteps,
  File "/root/Experiment/StockTrading/maskable/MaskableDQN.py", line 118, in learn
    super().learn(*args, **kwargs)
  File "/root/miniconda3/lib/python3.8/site-packages/stable_baselines3/dqn/dqn.py", line 265, in learn
    return super().learn(
  File "/root/miniconda3/lib/python3.8/site-packages/stable_baselines3/common/off_policy_algorithm.py", line 334, in learn
    rollout = self.collect_rollouts(
  File "/root/miniconda3

TypeError: cannot pickle 'tensorflow.python.lib.io._pywrap_file_io.WritableFile' object

# Train models

In [17]:
model = MaskablePPO('MlpPolicy', env_train)
model.learn(total_timesteps=10000)

<sb3_contrib.ppo_mask.ppo_mask.MaskablePPO at 0x23c6647a220>

# Backtest

In [None]:
# df_t = dfs_test[3]
# list_asset, actions = simulate_trading_masked(env_factory([df_t]), model)
# sr_asset = pd.Series(list_asset)
# sr_return = get_daily_return(sr_asset)
# backtest_stats(sr_return)
# sr_baseline_return = get_daily_return(df_t.close).dropna()
# sr_baseline_return = sr_baseline_return[len(sr_baseline_return) - len(sr_asset):]
# backtest_stats(sr_baseline_return)
# %matplotlib inline
# sr_date = df_t.date
# sr_date = sr_date[len(sr_date) - len(sr_asset):]
# sr_return.set_axis(sr_date, inplace=True)
# sr_baseline_return.set_axis(sr_date, inplace=True)
# backtest_plot(sr_return, sr_baseline_return)
# sum(actions)

# Plot

# Result

## Hyperparameter tuning for maskable PPO

[I 2023-01-30 11:35:09,452] A new study created in memory with name: MaskablePPO_study
[I 2023-01-30 11:45:22,296] Trial 0 finished with value: -1.0710616666666666 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 2, 'batch_size_2exp': 5, 'n_epochs': 3, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 0 with value: -1.0710616666666666.
[I 2023-01-30 11:54:56,256] Trial 1 finished with value: -0.2739243333333334 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 5, 'batch_size_2exp': 7, 'n_epochs': 2, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 1 with value: -0.2739243333333334.
[I 2023-01-30 11:59:04,286] Trial 2 finished with value: -0.7405313333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 7, 'batch_size_2exp': 8, 'n_epochs': 1, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 1 with value: -0.2739243333333334.
[I 2023-01-30 12:07:29,519] Trial 3 finished with value: -0.3807473333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 1 with value: -0.2739243333333334.
[I 2023-01-30 12:14:18,185] Trial 4 finished with value: -1.4296143333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 5, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 9, 'net_arch_layers': 5}. Best is trial 1 with value: -0.2739243333333334.
[I 2023-01-30 12:27:19,143] Trial 5 finished with value: -1.0494126666666668 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 4, 'batch_size_2exp': 7, 'n_epochs': 4, 'net_arch_dim_2exp': 7, 'net_arch_layers': 4}. Best is trial 1 with value: -0.2739243333333334.
[I 2023-01-30 12:34:46,817] Trial 6 finished with value: -0.2175826666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 6 with value: -0.2175826666666667.
[I 2023-01-30 12:34:46,989] Trial 7 finished with value: -999.0 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 0, 'batch_size_2exp': 5, 'n_epochs': 1, 'net_arch_dim_2exp': 8, 'net_arch_layers': 4}. Best is trial 6 with value: -0.2175826666666667.
Expected parameter logits (Tensor of shape (1, 3)) of distribution MaskableCategorical(logits: torch.Size([1, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan]], device='cuda:0')
[I 2023-01-30 12:40:39,196] Trial 8 finished with value: 0.139528 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 12:44:18,919] Trial 9 finished with value: -0.8220639999999998 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 7, 'batch_size_2exp': 8, 'n_epochs': 2, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 12:48:21,233] Trial 10 finished with value: 0.005568666666666666 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 12:58:03,468] Trial 11 finished with value: -0.8772513333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:05:05,425] Trial 12 finished with value: -0.5665723333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:09:48,089] Trial 13 finished with value: -0.25151299999999993 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 6, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:18:46,623] Trial 14 finished with value: -1.2781556666666667 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 8, 'batch_size_2exp': 5, 'n_epochs': 3, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:25:25,358] Trial 15 finished with value: -1.2174103333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 6, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:29:13,929] Trial 16 finished with value: -0.38875166666666666 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 7, 'batch_size_2exp': 7, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:29:14,213] Trial 17 finished with value: -999.0 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 0, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
Expected parameter logits (Tensor of shape (1, 3)) of distribution MaskableCategorical(logits: torch.Size([1, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan]], device='cuda:0', grad_fn=<SubBackward0>)
[I 2023-01-30 13:33:41,079] Trial 18 finished with value: -0.515313 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 6, 'batch_size_2exp': 6, 'n_epochs': 3, 'net_arch_dim_2exp': 6, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:37:33,111] Trial 19 finished with value: -0.7629226666666667 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 8, 'batch_size_2exp': 7, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 13:41:35,566] Trial 20 finished with value: -0.6497763333333334 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 5, 'batch_size_2exp': 5, 'n_epochs': 2, 'net_arch_dim_2exp': 7, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 14:00:16,185] Trial 21 finished with value: -0.3992253333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 14:08:04,275] Trial 22 finished with value: -1.0690483333333332 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 4, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 14:25:59,409] Trial 23 finished with value: -0.030084666666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 14:45:40,132] Trial 24 finished with value: -1.3744766666666666 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 1, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 15:23:03,830] Trial 25 finished with value: -1.7653846666666666 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 1, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 15:28:30,068] Trial 26 finished with value: -0.7855256666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 7, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 15:38:54,512] Trial 27 finished with value: -0.6224313333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 15:49:41,475] Trial 28 finished with value: -0.3991933333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 8, 'n_epochs': 4, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 16:20:58,663] Trial 29 finished with value: -1.0415336666666666 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 1, 'batch_size_2exp': 5, 'n_epochs': 3, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 16:30:02,294] Trial 30 finished with value: -1.4698586666666669 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 4, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 16:43:21,658] Trial 31 finished with value: 0.02088833333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:00:20,585] Trial 32 finished with value: -0.8848716666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:15:36,529] Trial 33 finished with value: -0.019853 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:22:31,901] Trial 34 finished with value: -0.44696099999999994 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 8, 'n_epochs': 4, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:31:47,434] Trial 35 finished with value: -0.17429333333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 5, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:37:08,776] Trial 36 finished with value: -0.413024 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 4, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:45:36,550] Trial 37 finished with value: -0.5693386666666667 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 8, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:52:27,447] Trial 38 finished with value: -0.32163899999999995 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 6, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 17:58:10,626] Trial 39 finished with value: -0.39240733333333333 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 7, 'batch_size_2exp': 8, 'n_epochs': 1, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 18:02:35,272] Trial 40 finished with value: -0.5711713333333334 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 5, 'batch_size_2exp': 7, 'n_epochs': 3, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 18:12:29,331] Trial 41 finished with value: -0.09938799999999999 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 18:30:15,883] Trial 42 finished with value: -0.28991866666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 18:38:42,726] Trial 43 finished with value: -0.4875106666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 18:49:52,209] Trial 44 finished with value: -1.0828693333333332 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 4, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 19:35:10,493] Trial 45 finished with value: -0.6904150000000001 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 1, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 19:45:15,404] Trial 46 finished with value: -0.8048216666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 8, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 3}. Best is trial 8 with value: 0.139528.
[I 2023-01-30 19:49:33,910] Trial 47 finished with value: 0.20815533333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 19:57:19,009] Trial 48 finished with value: -0.47554466666666667 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 4}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:04:59,025] Trial 49 finished with value: -0.7375536666666668 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:11:10,333] Trial 50 finished with value: -0.9862456666666667 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 7, 'batch_size_2exp': 5, 'n_epochs': 4, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:19:04,843] Trial 51 finished with value: -0.14732266666666669 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:26:01,710] Trial 52 finished with value: -0.7422023333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 7, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 8, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:30:08,180] Trial 53 finished with value: -0.5128739999999999 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:37:32,129] Trial 54 finished with value: -0.9192929999999998 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:52:52,159] Trial 55 finished with value: -0.131528 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 8, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 20:59:14,149] Trial 56 finished with value: -1.0313139999999998 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 7, 'batch_size_2exp': 6, 'n_epochs': 2, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 21:10:03,209] Trial 57 finished with value: -1.6049653333333334 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 4, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 21:17:36,516] Trial 58 finished with value: -0.27793833333333334 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 6, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 21:17:36,563] Trial 59 finished with value: -999.0 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 0, 'batch_size_2exp': 7, 'n_epochs': 4, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
Expected parameter logits (Tensor of shape (1, 3)) of distribution MaskableCategorical(logits: torch.Size([1, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan]], device='cuda:0', grad_fn=<SubBackward0>)
[I 2023-01-30 21:46:16,107] Trial 60 finished with value: -0.49509800000000004 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 21:56:03,564] Trial 61 finished with value: -0.6322530000000001 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 22:12:22,781] Trial 62 finished with value: -0.7224483333333332 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 22:30:54,019] Trial 63 finished with value: 0.04822266666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 22:42:31,342] Trial 64 finished with value: 0.003159666666666665 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 22:47:32,895] Trial 65 finished with value: -0.40266699999999994 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 8, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 22:53:58,820] Trial 66 finished with value: -0.477679 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 4, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 23:32:58,481] Trial 67 finished with value: -1.2847836666666668 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 1, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 23:42:40,603] Trial 68 finished with value: -0.48326399999999997 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 2, 'batch_size_2exp': 5, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 23:54:21,760] Trial 69 finished with value: -0.39726733333333336 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 4, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-30 23:59:47,350] Trial 70 finished with value: -0.4848543333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 4, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 00:16:09,006] Trial 71 finished with value: -0.9602633333333334 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 00:35:12,061] Trial 72 finished with value: -0.3500816666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 00:43:32,720] Trial 73 finished with value: 0.007844333333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 00:52:07,215] Trial 74 finished with value: -0.342135 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 00:56:10,199] Trial 75 finished with value: -0.7481516666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 7, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:03:47,370] Trial 76 finished with value: -0.5838169999999999 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 8, 'n_epochs': 3, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:13:26,577] Trial 77 finished with value: -0.4263326666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 3, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 9, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:19:16,797] Trial 78 finished with value: -0.5381776666666668 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 5, 'batch_size_2exp': 6, 'n_epochs': 4, 'net_arch_dim_2exp': 10, 'net_arch_layers': 4}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:23:02,532] Trial 79 finished with value: 0.19475166666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 8, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:26:53,095] Trial 80 finished with value: -0.8212086666666667 and parameters: {'learning_rate_3_exp': -4, 'n_steps_2exp': 8, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:31:07,702] Trial 81 finished with value: -0.6204623333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 8, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:34:51,587] Trial 82 finished with value: -0.5214996666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 8, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:40:38,826] Trial 83 finished with value: -1.095369 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 8, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:44:33,952] Trial 84 finished with value: -0.8564336666666666 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 7, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 01:56:47,320] Trial 85 finished with value: -0.3596026666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 4, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 02:02:02,910] Trial 86 finished with value: -0.9681890000000001 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 8, 'batch_size_2exp': 6, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 02:08:41,924] Trial 87 finished with value: -0.26260633333333333 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 3, 'batch_size_2exp': 7, 'n_epochs': 4, 'net_arch_dim_2exp': 8, 'net_arch_layers': 3}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 02:12:41,794] Trial 88 finished with value: -0.786614 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 7, 'batch_size_2exp': 7, 'n_epochs': 5, 'net_arch_dim_2exp': 7, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 02:23:07,728] Trial 89 finished with value: -0.8378396666666666 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 4, 'batch_size_2exp': 8, 'n_epochs': 5, 'net_arch_dim_2exp': 6, 'net_arch_layers': 4}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 02:48:17,021] Trial 90 finished with value: 0.007555333333333334 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 1, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 02:48:17,695] Trial 91 finished with value: -999.0 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 0, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
Expected parameter logits (Tensor of shape (1, 3)) of distribution MaskableCategorical(logits: torch.Size([1, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan]], device='cuda:0', grad_fn=<SubBackward0>)
[I 2023-01-31 03:04:46,258] Trial 92 finished with value: -0.009927333333333335 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 03:27:55,914] Trial 93 finished with value: -0.1371403333333333 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 1, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 04:02:10,640] Trial 94 finished with value: -0.0028663333333333336 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 1, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 04:47:43,578] Trial 95 finished with value: -0.26596166666666665 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 1, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 05:30:27,088] Trial 96 finished with value: -0.004809666666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 1, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
[I 2023-01-31 05:30:27,700] Trial 97 finished with value: -999.0 and parameters: {'learning_rate_3_exp': -3, 'n_steps_2exp': 0, 'batch_size_2exp': 5, 'n_epochs': 5, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.
Expected parameter logits (Tensor of shape (1, 3)) of distribution MaskableCategorical(logits: torch.Size([1, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan]], device='cuda:0', grad_fn=<SubBackward0>)
[I 2023-01-31 05:48:56,050] Trial 98 finished with value: -0.37979466666666667 and parameters: {'learning_rate_3_exp': -5, 'n_steps_2exp': 2, 'batch_size_2exp': 5, 'n_epochs': 4, 'net_arch_dim_2exp': 10, 'net_arch_layers': 5}. Best is trial 47 with value: 0.20815533333333333.