In [1]:
import numpy as np
import pandas as pd
from utils import misc
from utils.misc import get_attr
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from agent import DQNagent
from gym_env import TradingEnv
from trainer import Trainer

import optuna

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


pd.set_option('mode.chained_assignment', None)

In [2]:
spy_df = pd.read_csv('datasets/price_series/SPY.csv')
spy_df = misc.clean_df(spy_df)
spy_df

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker
0,1993-01-29,43.968750,43.968750,43.750000,43.937500,25.218206,1003200,SPY
1,1993-02-01,43.968750,44.250000,43.968750,44.250000,25.397572,480500,SPY
2,1993-02-02,44.218750,44.375000,44.125000,44.343750,25.451397,201300,SPY
3,1993-02-03,44.406250,44.843750,44.375000,44.812500,25.720430,529400,SPY
4,1993-02-04,44.968750,45.093750,44.468750,45.000000,25.828049,531500,SPY
...,...,...,...,...,...,...,...,...
7581,2023-03-09,399.739990,401.480011,390.529999,391.559998,391.559998,111945300,SPY
7582,2023-03-10,390.989990,393.160004,384.320007,385.910004,385.910004,189105300,SPY
7583,2023-03-13,381.809998,390.390015,380.649994,385.359985,385.359985,157790000,SPY
7584,2023-03-14,390.500000,393.450012,387.049988,391.730011,391.730011,149752400,SPY


In [3]:
df = spy_df.copy()

# generate moving average technical indicator
col_names = []
for i in range(3, 21):
    col_name = 'ma' + str(i)
    df[col_name] = df['close'].rolling(i).mean()
    col_names.append(col_name)

df['returns'] = np.insert(df['close'].to_numpy()[1:] / df['close'].to_numpy()[:-1], 1, 0) - 1
df['volatility'] = df['returns'].rolling(30).std()

df = df.dropna()
df



Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker,ma3,ma4,...,ma13,ma14,ma15,ma16,ma17,ma18,ma19,ma20,returns,volatility
29,1993-03-12,45.187500,45.218750,44.812500,45.093750,25.881849,643600,SPY,45.447917,45.484375,...,44.973558,44.881696,44.804167,44.726562,44.648897,44.581597,44.523026,44.526562,-0.010288,0.182937
30,1993-03-15,45.062500,45.312500,45.062500,45.312500,26.007406,310800,SPY,45.322917,45.414062,...,45.055288,44.997768,44.910417,44.835938,44.761029,44.685764,44.620066,44.562500,0.004851,0.182920
31,1993-03-16,45.312500,45.437500,45.312500,45.312500,26.007406,30800,SPY,45.239583,45.320312,...,45.129808,45.073661,45.018750,44.935547,44.863971,44.791667,44.718750,44.654688,0.000000,0.008342
32,1993-03-17,45.250000,45.250000,44.968750,45.031250,25.845985,21800,SPY,45.218750,45.187500,...,45.177885,45.122768,45.070833,45.019531,44.941176,44.873264,44.804276,44.734375,-0.006207,0.008435
33,1993-03-18,45.218750,45.500000,45.218750,45.312500,26.007406,59300,SPY,45.218750,45.242188,...,45.257212,45.187500,45.135417,45.085938,45.036765,44.961806,44.896382,44.829687,0.006246,0.008294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7581,2023-03-09,399.739990,401.480011,390.529999,391.559998,391.559998,111945300,SPY,396.250000,398.305000,...,398.355387,398.991431,399.610669,400.508753,401.222356,401.867225,402.192108,402.437003,-0.018450,0.010437
7582,2023-03-10,390.989990,393.160004,384.320007,385.910004,385.910004,189105300,SPY,392.130005,393.665001,...,397.341541,397.466431,398.119336,398.754377,399.650003,400.371670,401.027371,401.378003,-0.014429,0.010488
7583,2023-03-13,381.809998,390.390015,380.649994,385.359985,385.359985,157790000,SPY,387.609996,390.437500,...,396.327693,396.485716,396.659334,397.321877,397.966472,398.856113,399.581582,400.244002,-0.001425,0.010463
7584,2023-03-14,390.500000,393.450012,387.049988,391.730011,391.730011,149752400,SPY,387.666667,388.639999,...,395.640771,395.999287,396.168669,396.351252,396.992943,397.620002,398.481055,399.189003,0.016530,0.010763


In [5]:
class DQN(nn.Module):
    
    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)
    

In [6]:
input_feature_list = [
    'open',
    'high',
    'low',
    'close',
    'adjclose',
    'volume',
    'volatility',
]

input_feature_list += col_names

trader_state_list = [
    'cash',
    'position',
    'position_value',
    'portfolio_value',
    # 'leverage',
    # 'portfolio_volatility',
]

params = {
    # agent params
    'action_size': 9,
    'state_size': len(input_feature_list) + len(trader_state_list),
    'epsilon': 1.0,
    'epsilon_decay': 0.9995,
    'epsilon_min': 0.01,
    'replay_memory_size': 100000,
    'batch_size': 200,
    'gamma': 0.99,
    'new_model': True,
    'save_model_path': 'dqn.pth',
    'load_model_path': 'dqn.pth',
    # env_params
    'initial_capital': 1000000,
    'hindsight_weight': 0.5,
    'lookback_period': 20,
    'lookforward_period': 20,
    'render_window_size': 10,
    'input_feature_list': input_feature_list,
    # trainer_params
    'num_episodes': 20,
    'record_var_list': ['portfolio_return'],
    'learning_rate': 0.001,
    'target_update_threshold': 10,
}

train_df = df[-500:-100]
test_df = df[-100:]

train_env = TradingEnv(df=train_df, **params)
test_env = TradingEnv(df=test_df, **params)

agent = DQNagent(DQN, **params)

  logger.warn(
  logger.warn(


In [7]:
trainer = Trainer(agent=agent, **params)

record, trade_record = trainer.train(train_df)

plt.figure(figsize=(12, 6))
colors = plt.cm.viridis(np.linspace(0, 1, 20))

for r in range(len(record)):
    plt.plot(record[r]['portfolio_return'], label=r, color=colors[r])

plt.legend()
plt.show()

In [8]:
# Define the objective function to be optimized by Optuna
def objective(trial):
    params = {
        # agent params
        'action_size': 9,
        'state_size': len(input_feature_list) + len(trader_state_list),
        'epsilon': 1.0,
        'epsilon_decay': trial.suggest_float('epsilon_decay', 0.8, 0.9995),
        'epsilon_min': trial.suggest_float('epsilon_min', 0.005, 0.05),
        'replay_memory_size': trial.suggest_int('replay_memory_size', 1000, 100000),
        'batch_size': trial.suggest_int('batch_size', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.01, 0.99),
        'new_model': True,
        'save_model_path': 'dqn.pth',
        'load_model_path': 'dqn.pth',
        # env_params
        'initial_capital': 1000000,
        'hindsight_weight': trial.suggest_float('hindsight_weight', 0.1, 1.0),
        'lookback_period': 20,
        'lookforward_period': 20,
        'render_window_size': 10,
        'input_feature_list': input_feature_list,
        # trainer_params
        'num_episodes': 10,
        'record_var_list': ['portfolio_return'],
        'learning_rate': trial.suggest_float('learning_rate', 0.0005, 0.1),
        'target_update_threshold': 9,
    }

    # Train the model using the current set of hyperparameters
    obj_agent = DQNagent(DQN, **params)
    obj_trainer = Trainer(obj_agent, **params)
    obj_trainer.train(train_df)
    obj_env = obj_trainer.test(test_df)

    # Compute the validation loss
    score = obj_env.sharpe

    return score

# Create the Optuna study
study = optuna.create_study(direction='maximize')

# Run the optimization
study.optimize(objective, n_trials=50)

[I 2023-03-30 15:54:50,534] A new study created in memory with name: no-name-3e3f6e21-d8e2-4a47-b4e3-111314b1c8bb
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:55:18,673] Trial 0 finished with value: 0.0594929128328543 and parameters: {'epsilon_decay': 0.8754540183687379, 'epsilon_min': 0.025034483144752352, 'replay_memory_size': 93395, 'batch_size': 811, 'gamma': 0.0727279716770231, 'hindsight_weight': 0.9797991793186216, 'learning_rate': 0.09045014942811247}. Best is trial 0 with value: 0.0594929128328543.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:55:40,763] Trial 1 finished with value: -0.21356014361159922 and parameters: {'epsilon_decay': 0.8290752461144345, 'epsilon_min': 0.01759529143873715, 'replay_memory_size': 80063, 'batch_size': 477, 'gamma': 0.5643360031647766, 'hindsight_weight': 0.40661183543217716, 'learning_rate': 0.07679088631062618}. Best is trial 0 with value: 0.0594929128328543.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:56:05,735] Trial 2 finished with value: -0.20466084504409562 and parameters: {'epsilon_decay': 0.8706343578420179, 'epsilon_min': 0.02498914790229423, 'replay_memory_size': 32107, 'batch_size': 658, 'gamma': 0.6583264528953695, 'hindsight_weight': 0.6333928635283658, 'learning_rate': 0.006044121496159675}. Best is trial 0 with value: 0.0594929128328543.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:56:28,154] Trial 3 finished with value: 0.15121865292444261 and parameters: {'epsilon_decay': 0.8518653211904107, 'epsilon_min': 0.02317218867147426, 'replay_memory_size': 29078, 'batch_size': 543, 'gamma': 0.5888124594344127, 'hindsight_weight': 0.8152537798882845, 'learning_rate': 0.0041382183373910916}. Best is trial 3 with value: 0.15121865292444261.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  self.sharpe = self.total_return / self.volatility
  self.calmar = self.total_return / self.max_drawdown
[W 2023-03-30 15:56:49,219] Trial 4 failed with parameters: {'epsilon_decay': 0.9205251664966149, 'epsilon_min': 0.03731178354086964, 'replay_memory_size': 86585, 'batch_size': 397, 'gamma': 0.7985882877484318, 'hindsight_weight': 0.11596895286879237, 'learning_rate': 0.09505063589139749} because of the following error: The value nan is not acceptable..
[W 2023-03-30 15:56:49,220] Trial 4 failed with value nan.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:57:16,352] Trial 5 finished with value: 0.1301028334709071 and parameters: {'epsilon_decay': 0.9255881494058259, 'epsilon_min': 0.04321888138481784, 'replay_memory_size': 36505, 'batch_size': 743, 'gamma': 0.01214474816153547, 'hindsight_weight': 0.634716508401763, 'learning_rate': 0.08896590484112564}. Best is trial 3 with value: 0.15121865292444261.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:57:32,792] Trial 6 finished with value: -0.15299054031862172 and parameters: {'epsilon_decay': 0.8021820396069713, 'epsilon_min': 0.046188063103377204, 'replay_memory_size': 37433, 'batch_size': 244, 'gamma': 0.07412420742425563, 'hindsight_weight': 0.9549592561334588, 'learning_rate': 0.014325504378050446}. Best is trial 3 with value: 0.15121865292444261.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:57:57,069] Trial 7 finished with value: 0.032860367635225554 and parameters: {'epsilon_decay': 0.9477028984620095, 'epsilon_min': 0.02984483534262588, 'replay_memory_size': 53421, 'batch_size': 672, 'gamma': 0.17475361876596274, 'hindsight_weight': 0.4137650130924486, 'learning_rate': 0.05303994670419017}. Best is trial 3 with value: 0.15121865292444261.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  self.sharpe = self.total_return / self.volatility
  self.calmar = self.total_return / self.max_drawdown
[W 2023-03-30 15:58:13,779] Trial 8 failed with parameters: {'epsilon_decay': 0.9456322227678692, 'epsilon_min': 0.028011596459730514, 'replay_memory_size': 22158, 'batch_size': 255, 'gamma': 0.9847527706117779, 'hindsight_weight': 0.1696496276497957, 'learning_rate': 0.0103121820288883} because of the following error: The value nan is not acceptable..
[W 2023-03-30 15:58:13,780] Trial 8 failed with value nan.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:58:25,253] Trial 9 finished with value: 0.16033764289733793 and parameters: {'epsilon_decay': 0.9036195401529717, 'epsilon_min': 0.049858275969869505, 'replay_memory_size': 65695, 'batch_size': 108, 'gamma': 0.6047184999124839, 'hindsight_weight': 0.6195666739170871, 'learning_rate': 0.04159993452789358}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:58:51,499] Trial 10 finished with value: 0.06282610556812125 and parameters: {'epsilon_decay': 0.8682901767211527, 'epsilon_min': 0.00865671808123491, 'replay_memory_size': 22585, 'batch_size': 768, 'gamma': 0.7835385624320994, 'hindsight_weight': 0.7523840747474052, 'learning_rate': 0.07434908445356067}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:59:03,576] Trial 11 finished with value: 0.03361267183661883 and parameters: {'epsilon_decay': 0.9198853576758221, 'epsilon_min': 0.03576589726753611, 'replay_memory_size': 53412, 'batch_size': 139, 'gamma': 0.8409929663608712, 'hindsight_weight': 0.11279137018470965, 'learning_rate': 0.005968910343392269}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:59:21,918] Trial 12 finished with value: 0.1203018622731813 and parameters: {'epsilon_decay': 0.9987522081172125, 'epsilon_min': 0.04953473455023018, 'replay_memory_size': 3037, 'batch_size': 337, 'gamma': 0.9884223757231229, 'hindsight_weight': 0.4809821673178795, 'learning_rate': 0.032903499369741844}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 15:59:58,064] Trial 13 finished with value: -0.20141158095038098 and parameters: {'epsilon_decay': 0.838425365649841, 'epsilon_min': 0.03646544535676401, 'replay_memory_size': 66671, 'batch_size': 987, 'gamma': 0.448572903030835, 'hindsight_weight': 0.7534273717737276, 'learning_rate': 0.02895374877421502}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:00:19,232] Trial 14 finished with value: -0.1385643227229216 and parameters: {'epsilon_decay': 0.8954428288073493, 'epsilon_min': 0.040209984555283365, 'replay_memory_size': 13539, 'batch_size': 465, 'gamma': 0.42385379598472683, 'hindsight_weight': 0.8150313269152613, 'learning_rate': 0.04512661983502013}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:00:31,235] Trial 15 finished with value: -0.19619482832250618 and parameters: {'epsilon_decay': 0.8432101218110025, 'epsilon_min': 0.03213684213484491, 'replay_memory_size': 69360, 'batch_size': 134, 'gamma': 0.5889779972315039, 'hindsight_weight': 0.8205842253600121, 'learning_rate': 0.02076743469355727}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:00:49,201] Trial 16 finished with value: -0.16581560202154189 and parameters: {'epsilon_decay': 0.8977015987626579, 'epsilon_min': 0.04130699655738475, 'replay_memory_size': 45137, 'batch_size': 347, 'gamma': 0.3308881502668606, 'hindsight_weight': 0.5926828114274086, 'learning_rate': 0.0007742590473674136}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:01:19,054] Trial 17 finished with value: -0.16581560202154189 and parameters: {'epsilon_decay': 0.857071769086149, 'epsilon_min': 0.049211390946377526, 'replay_memory_size': 72017, 'batch_size': 950, 'gamma': 0.6748368575787786, 'hindsight_weight': 0.8931054187312318, 'learning_rate': 0.045952135304489085}. Best is trial 9 with value: 0.16033764289733793.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:01:42,084] Trial 18 finished with value: 0.18790649137843535 and parameters: {'epsilon_decay': 0.8068205659295349, 'epsilon_min': 0.01953663177930925, 'replay_memory_size': 92232, 'batch_size': 573, 'gamma': 0.4982415704249186, 'hindsight_weight': 0.7018667290498203, 'learning_rate': 0.01901092795674175}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:02:00,538] Trial 19 finished with value: -0.2327582918848867 and parameters: {'epsilon_decay': 0.80640155102455, 'epsilon_min': 0.018945579643788225, 'replay_memory_size': 97808, 'batch_size': 381, 'gamma': 0.3334169898316256, 'hindsight_weight': 0.6600892116893087, 'learning_rate': 0.03446413577121366}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:02:23,940] Trial 20 finished with value: 0.013086306952231449 and parameters: {'epsilon_decay': 0.8172266284330376, 'epsilon_min': 0.03279877731126485, 'replay_memory_size': 85606, 'batch_size': 602, 'gamma': 0.47534980926011494, 'hindsight_weight': 0.5168541819202045, 'learning_rate': 0.0198566934004089}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:02:52,248] Trial 21 finished with value: 0.012583012526862452 and parameters: {'epsilon_decay': 0.8252657303908717, 'epsilon_min': 0.028560947451348552, 'replay_memory_size': 60838, 'batch_size': 859, 'gamma': 0.34767052641212903, 'hindsight_weight': 0.5516837565370684, 'learning_rate': 0.05795671401576814}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:03:07,917] Trial 22 finished with value: -0.2300546095539179 and parameters: {'epsilon_decay': 0.8831247613463832, 'epsilon_min': 0.0444958011016338, 'replay_memory_size': 83507, 'batch_size': 275, 'gamma': 0.5112696476315991, 'hindsight_weight': 0.7174857230406869, 'learning_rate': 0.035509446342090384}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:03:29,857] Trial 23 finished with value: 0.06522104096131678 and parameters: {'epsilon_decay': 0.8430769907995984, 'epsilon_min': 0.020523622335119576, 'replay_memory_size': 25996, 'batch_size': 503, 'gamma': 0.639089912182746, 'hindsight_weight': 0.8553576135063855, 'learning_rate': 0.012474073795011809}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:03:52,504] Trial 24 finished with value: 0.024250983378795332 and parameters: {'epsilon_decay': 0.8553826971993395, 'epsilon_min': 0.013870444215635814, 'replay_memory_size': 44969, 'batch_size': 569, 'gamma': 0.5444619516295314, 'hindsight_weight': 0.6950964784520716, 'learning_rate': 0.024407880164809728}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:04:07,647] Trial 25 finished with value: 0.031224949405297565 and parameters: {'epsilon_decay': 0.8137247060126416, 'epsilon_min': 0.02220598742374336, 'replay_memory_size': 77524, 'batch_size': 205, 'gamma': 0.7353209175694333, 'hindsight_weight': 0.7815883279900111, 'learning_rate': 0.013559757764972081}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:04:26,599] Trial 26 finished with value: -0.16581560202154189 and parameters: {'epsilon_decay': 0.8020007118686968, 'epsilon_min': 0.026030506720105276, 'replay_memory_size': 91587, 'batch_size': 407, 'gamma': 0.5977858251802828, 'hindsight_weight': 0.8925236865294742, 'learning_rate': 0.024941031551939816}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:04:51,394] Trial 27 finished with value: 0.1635535962347374 and parameters: {'epsilon_decay': 0.8274130913859361, 'epsilon_min': 0.01446843739134506, 'replay_memory_size': 60293, 'batch_size': 647, 'gamma': 0.5257871461520296, 'hindsight_weight': 0.6907495420163698, 'learning_rate': 0.0016809276230695565}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:05:15,996] Trial 28 finished with value: -0.16221776579640507 and parameters: {'epsilon_decay': 0.8309042436199002, 'epsilon_min': 0.015992365795767047, 'replay_memory_size': 66310, 'batch_size': 655, 'gamma': 0.40380523554422343, 'hindsight_weight': 0.675083289909701, 'learning_rate': 0.013883800396470707}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:05:42,197] Trial 29 finished with value: -0.08876836192052223 and parameters: {'epsilon_decay': 0.8234400486758072, 'epsilon_min': 0.012150934649451378, 'replay_memory_size': 59837, 'batch_size': 729, 'gamma': 0.4591730415389279, 'hindsight_weight': 0.5986272043700445, 'learning_rate': 0.0006917684022129612}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:06:09,985] Trial 30 finished with value: 0.05658218483846833 and parameters: {'epsilon_decay': 0.8155704491925386, 'epsilon_min': 0.008913358012029075, 'replay_memory_size': 73050, 'batch_size': 863, 'gamma': 0.5319462470083651, 'hindsight_weight': 0.7183079881381332, 'learning_rate': 0.03677099048138098}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  self.sharpe = self.total_return / self.volatility
  self.calmar = self.total_return / self.max_drawdown
[W 2023-03-30 16:06:33,070] Trial 31 failed with parameters: {'epsilon_decay': 0.8840229514964283, 'epsilon_min': 0.020322740770109624, 'replay_memory_size': 90702, 'batch_size': 611, 'gamma': 0.7113608251975923, 'hindsight_weight': 0.9594168720115375, 'learning_rate': 0.020630181029633175} because of the following error: The value nan is not acceptable..
[W 2023-03-30 16:06:33,071] Trial 31 failed with value nan.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:06:56,551] Trial 32 finished with value: -0.22741162182293012 and parameters: {'epsilon_decay': 0.8863215768886923, 'epsilon_min': 0.020816277053880457, 'replay_memory_size': 89819, 'batch_size': 615, 'gamma': 0.5088317921091057, 'hindsight_weight': 0.9873313794754999, 'learning_rate': 0.019965059971910126}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:07:16,297] Trial 33 finished with value: -0.17945289486237387 and parameters: {'epsilon_decay': 0.8014081780918293, 'epsilon_min': 0.006709841855503447, 'replay_memory_size': 96393, 'batch_size': 426, 'gamma': 0.26173658892303175, 'hindsight_weight': 0.7291854727275262, 'learning_rate': 0.02863091272509183}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:07:38,006] Trial 34 finished with value: -0.16406173726717957 and parameters: {'epsilon_decay': 0.8526810814738085, 'epsilon_min': 0.022633254849882132, 'replay_memory_size': 60381, 'batch_size': 524, 'gamma': 0.6548711457294344, 'hindsight_weight': 0.7648187170329468, 'learning_rate': 0.007694826022798057}. Best is trial 18 with value: 0.18790649137843535.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:08:01,904] Trial 35 finished with value: 0.19550776612230739 and parameters: {'epsilon_decay': 0.8338976430774938, 'epsilon_min': 0.01595261801485989, 'replay_memory_size': 47741, 'batch_size': 570, 'gamma': 0.5751228752339957, 'hindsight_weight': 0.8112782941038195, 'learning_rate': 0.008316617400271873}. Best is trial 35 with value: 0.19550776612230739.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:08:27,917] Trial 36 finished with value: 0.23164869799734486 and parameters: {'epsilon_decay': 0.8290239394640275, 'epsilon_min': 0.017168378276654614, 'replay_memory_size': 44575, 'batch_size': 680, 'gamma': 0.5310553533344482, 'hindsight_weight': 0.6678958845727966, 'learning_rate': 0.010507420421229695}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:08:54,269] Trial 37 finished with value: 0.1295429060477006 and parameters: {'epsilon_decay': 0.8339086215360573, 'epsilon_min': 0.01674215012541518, 'replay_memory_size': 46084, 'batch_size': 688, 'gamma': 0.5303083032928086, 'hindsight_weight': 0.6747865955331959, 'learning_rate': 0.00825376971559035}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:09:18,413] Trial 38 finished with value: -0.16581807485545996 and parameters: {'epsilon_decay': 0.8267362913707026, 'epsilon_min': 0.01833887271487601, 'replay_memory_size': 40013, 'batch_size': 595, 'gamma': 0.4845927342579793, 'hindsight_weight': 0.7984375856887874, 'learning_rate': 0.009143297212228294}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:09:49,833] Trial 39 finished with value: 0.1858597422505205 and parameters: {'epsilon_decay': 0.8189766696238752, 'epsilon_min': 0.014134213842015787, 'replay_memory_size': 50242, 'batch_size': 801, 'gamma': 0.5612200781566726, 'hindsight_weight': 0.6553532570506662, 'learning_rate': 0.0023471857730168657}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:10:19,627] Trial 40 finished with value: -0.22505802080410497 and parameters: {'epsilon_decay': 0.839628644021797, 'epsilon_min': 0.011386495258369636, 'replay_memory_size': 32918, 'batch_size': 795, 'gamma': 0.42065294567092126, 'hindsight_weight': 0.6293505750543278, 'learning_rate': 0.01669133301577621}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:10:48,371] Trial 41 finished with value: 0.09030834821783663 and parameters: {'epsilon_decay': 0.8128427921253085, 'epsilon_min': 0.02436608423148636, 'replay_memory_size': 50780, 'batch_size': 835, 'gamma': 0.5618849341926612, 'hindsight_weight': 0.5790522444137756, 'learning_rate': 0.010008675263557166}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:11:14,950] Trial 42 finished with value: 0.10949378010973064 and parameters: {'epsilon_decay': 0.8121125637853979, 'epsilon_min': 0.017352975813441213, 'replay_memory_size': 55055, 'batch_size': 701, 'gamma': 0.6977972987163737, 'hindsight_weight': 0.8622882101852071, 'learning_rate': 0.004970596923968138}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:11:44,910] Trial 43 finished with value: -0.19984035911110692 and parameters: {'epsilon_decay': 0.8621286187255565, 'epsilon_min': 0.019527209498528927, 'replay_memory_size': 40168, 'batch_size': 895, 'gamma': 0.6153602911677051, 'hindsight_weight': 0.762756153498928, 'learning_rate': 0.017573517899649435}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:12:11,678] Trial 44 finished with value: 0.16153716169604107 and parameters: {'epsilon_decay': 0.8241887502865313, 'epsilon_min': 0.015282804368444945, 'replay_memory_size': 48566, 'batch_size': 652, 'gamma': 0.5787977861847506, 'hindsight_weight': 0.6469946129149862, 'learning_rate': 0.002174845865216944}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:12:39,280] Trial 45 finished with value: 0.16270508142236853 and parameters: {'epsilon_decay': 0.8443456040032535, 'epsilon_min': 0.013363181341632207, 'replay_memory_size': 33225, 'batch_size': 745, 'gamma': 0.5448618520047906, 'hindsight_weight': 0.7119196205630178, 'learning_rate': 0.005241268882770175}. Best is trial 36 with value: 0.23164869799734486.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:13:02,613] Trial 46 finished with value: 0.24577021240293784 and parameters: {'epsilon_decay': 0.8306813359067265, 'epsilon_min': 0.01444127970152943, 'replay_memory_size': 19639, 'batch_size': 571, 'gamma': 0.631254580290752, 'hindsight_weight': 0.6764271893051359, 'learning_rate': 0.011191048800987747}. Best is trial 46 with value: 0.24577021240293784.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:13:24,364] Trial 47 finished with value: -0.19749601029748254 and parameters: {'epsilon_decay': 0.8335089376122773, 'epsilon_min': 0.016901903717369667, 'replay_memory_size': 9293, 'batch_size': 463, 'gamma': 0.6139551069013232, 'hindsight_weight': 0.6339592596893786, 'learning_rate': 0.011376951859164994}. Best is trial 46 with value: 0.24577021240293784.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:13:48,492] Trial 48 finished with value: 0.06791298126501229 and parameters: {'epsilon_decay': 0.848441763759144, 'epsilon_min': 0.010740738245734354, 'replay_memory_size': 18749, 'batch_size': 555, 'gamma': 0.6858652809321945, 'hindsight_weight': 0.5444423724972708, 'learning_rate': 0.01536915724745845}. Best is trial 46 with value: 0.24577021240293784.
  logger.warn(


  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  logger.warn(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
[I 2023-03-30 16:14:16,080] Trial 49 finished with value: -0.16496882059858564 and parameters: {'epsilon_decay': 0.8000500208654341, 'epsilon_min': 0.015160561233279364, 'replay_memory_size': 26385, 'batch_size': 778, 'gamma': 0.6307618907244371, 'hindsight_weight': 0.7389773615484151, 'learning_rate': 0.007339691366692687}. Best is trial 46 with value: 0.24577021240293784.


Best trial: {'epsilon_decay': 0.8306813359067265, 'epsilon_min': 0.01444127970152943, 'replay_memory_size': 19639, 'batch_size': 571, 'gamma': 0.631254580290752, 'hindsight_weight': 0.6764271893051359, 'learning_rate': 0.011191048800987747}


In [9]:
# Print the best set of hyperparameters
print(f'Best trial: {study.best_trial.params}')

Best trial: {'epsilon_decay': 0.8306813359067265, 'epsilon_min': 0.01444127970152943, 'replay_memory_size': 19639, 'batch_size': 571, 'gamma': 0.631254580290752, 'hindsight_weight': 0.6764271893051359, 'learning_rate': 0.011191048800987747}


In [10]:
best_trial = {
    'epsilon_decay': 0.8306813359067265, 
    'epsilon_min': 0.01444127970152943, 
    'replay_memory_size': 19639, 
    'batch_size': 571, 
    'gamma': 0.631254580290752, 
    'hindsight_weight': 0.6764271893051359, 
    'learning_rate': 0.011191048800987747
}