In [1]:
import tensortrade.env.default as default
from tensortrade.oms.exchanges import Exchange
from tensortrade.feed import Stream
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.feed.core import Stream, DataFeed, NameSpace
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.instruments import Instrument
from tensortrade.agents import DQNAgent
from tensortrade.env.default.actions import BSH, ManagedRiskOrders
from tensortrade.env.default.rewards import RiskAdjustedReturns
from tensortrade.env.default.renderers import PlotlyTradingChart
from all_indicators import get_all_stock_indicators
from sklearn.preprocessing import MinMaxScaler
import multiprocessing
import pandas as pd
import numpy as np

pd.options.mode.use_inf_as_na = True
RMS_data = get_all_stock_indicators('RMS.PA')
AIR_data = get_all_stock_indicators('AIR.PA')



dropped Index(['trend_psar_up', 'trend_psar_down', 'alpha26', 'alpha45', 'alpha85'], dtype='object')
dropped Index(['trend_psar_up', 'trend_psar_down', 'alpha26', 'alpha94'], dtype='object')


In [2]:
from sklearn.model_selection import train_test_split

def split_data(data):
    X = data.copy().drop(columns=['Log Returns','Returns'])
    y = data.copy()['Returns']

    X_train_test, X_valid, y_train_test, y_valid = \
        train_test_split(X, y, train_size=0.8, test_size=0.2, shuffle=False)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_train_test, y_train_test, train_size=0.7, test_size=0.3, shuffle=False)

    return X_train, X_test, X_valid, y_train, y_test, y_valid

RMS_X_train, RMS_X_test, RMS_X_valid, RMS_y_train, RMS_y_test, RMS_y_valid = split_data(RMS_data)
AIR_X_train, AIR_X_test, AIR_X_valid, AIR_y_train, AIR_y_test, AIR_y_valid = split_data(AIR_data)

In [3]:

RMS_X_train.to_pickle('RMS_X_train.pickle')
RMS_X_test.to_pickle('RMS_X_test.pickle')
RMS_X_valid.to_pickle('RMS_X_valid.pickle')
AIR_X_train.to_pickle('AIR_X_train.pickle')
AIR_X_test.to_pickle('AIR_X_test.pickle')
AIR_X_valid.to_pickle('AIR_X_valid.pickle')

In [4]:
import os
import pickle
cwd = os.getcwd()
RMS_train_pickle = os.path.join(cwd, 'RMS_train.pickle')
RMS_test_pickle = os.path.join(cwd, 'RMS_test.pickle')
RMS_valid_pickle = os.path.join(cwd, 'RMS_valid.pickle')

AIR_train_pickle = os.path.join(cwd, 'AIR_train.pickle')
AIR_test_pickle = os.path.join(cwd, 'AIR_test.pickle')
AIR_valid_pickle = os.path.join(cwd, 'AIR_valid.pickle')

RMS_X_train.to_pickle(RMS_train_pickle)
RMS_X_test.to_pickle(RMS_test_pickle)
RMS_X_valid.to_pickle(RMS_valid_pickle)
AIR_X_train.to_pickle(AIR_train_pickle)
AIR_X_test.to_pickle(AIR_test_pickle)
AIR_X_valid.to_pickle(AIR_valid_pickle)

In [5]:
def separate_render_features(stock_data, ticker):
    #ohlc prices for render
    stock_ohlc = stock_data[['Open','High','Low','Close','Volume']].copy()
    stock_ohlc['date'] = stock_ohlc.index + pd.DateOffset(hours=2)
    stock_ohlc = stock_ohlc.add_prefix(f"{ticker}:")
    
    #all features to train from + minmax scaling
    scaler = MinMaxScaler()
    stock_features = stock_data.copy()
    stock_features = stock_features.add_prefix(f"{ticker}:")
    scaler.fit(stock_features)
    stock_features_scaled = pd.DataFrame(scaler.fit_transform(stock_features), columns = stock_features.columns, index = stock_features.index)
    return stock_ohlc, stock_features_scaled

def get_price_stream(stock_renders, tickers): #list
    stock_price_stream_list = []
    for i in range(len(stock_renders)):
        stock_price_stream_list.append(Stream.source(list(stock_renders[i][f"{tickers[i]}:Close"]), dtype="float").rename(f"EUR-{tickers[i].replace('.PA', '')}"))
    return stock_price_stream_list

In [6]:
def create_env(config):
    RMS_data = pd.read_pickle(config['RMS_filename'])
    AIR_data = pd.read_pickle(config['AIR_filename'])
    rms_render, rms_scaled_features = separate_render_features(RMS_data, 'RMS.PA')
    air_render, air_scaled_features = separate_render_features(AIR_data, 'AIR.PA')
    price_streams = get_price_stream([rms_render, air_render], ["RMS.PA", "AIR.PA"])
    euronext = Exchange('euronext', service=execute_order)(price_streams[0], price_streams[1])

    all_scaled_features = pd.concat([rms_scaled_features, air_scaled_features], axis=1)
    with NameSpace("euronext"):
        features = [Stream.source(list(all_scaled_features[feature]), dtype="float").rename(feature) for feature in all_scaled_features.columns]
    all_features_feed = DataFeed(features)
    all_features_feed.compile()

    EUR = Instrument('EUR', 4, 'Euro')
    RMS = Instrument('RMS', 4, 'Hermes')
    AIR = Instrument('AIR', 4, 'Airbus')

    cash = Wallet(euronext, 10000 * EUR)
    asset_rms = Wallet(euronext, 0 * RMS)
    asset_air = Wallet(euronext, 0 * AIR)

    portfolio = Portfolio(EUR, [cash, asset_rms, asset_air])

    reward_scheme = RiskAdjustedReturns(return_algorithm='sortino', window_size=100)
    action_scheme = ManagedRiskOrders()

    chart_renderer = PlotlyTradingChart(
        display=True,  # show the chart on screen (default)
        height=800,  # affects both displayed and saved file height. None for 100% height.
        save_format="html",  # save the chart to an HTML file
        auto_open_html=True,  # open the saved HTML chart in a new browser tab
    )
    
    renderer_feed = DataFeed([
        Stream.source(list(rms_render["RMS.PA:date"])).rename("date"),
        Stream.source(list(rms_render["RMS.PA:Open"]), dtype="float").rename("open"),
        Stream.source(list(rms_render["RMS.PA:High"]), dtype="float").rename("high"),
        Stream.source(list(rms_render["RMS.PA:Low"]), dtype="float").rename("low"),
        Stream.source(list(rms_render["RMS.PA:Close"]), dtype="float").rename("close"), 
        Stream.source(list(rms_render["RMS.PA:Volume"]), dtype="float").rename("volume"),
        Stream.source(list(air_render["AIR.PA:date"])).rename("date"),
        Stream.source(list(air_render["AIR.PA:Open"]), dtype="float").rename("open"),
        Stream.source(list(air_render["AIR.PA:High"]), dtype="float").rename("high"),
        Stream.source(list(air_render["AIR.PA:Low"]), dtype="float").rename("low"),
        Stream.source(list(air_render["AIR.PA:Close"]), dtype="float").rename("close"), 
        Stream.source(list(air_render["AIR.PA:Volume"]), dtype="float").rename("volume"),
    ])

    env = default.create(
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        feed=all_features_feed,
        renderer_feed=renderer_feed,
        renderer=chart_renderer,
        window_size=config["window_size"],
        max_allowed_loss=config["max_allowed_loss"]
    )
    
    return env


In [7]:
import ray
from ray import tune
from ray.tune.registry import register_env

ray.init(num_cpus=6,
         include_dashboard=True,
         ignore_reinit_error=True)

register_env("TradingEnv", create_env)

2022-12-13 00:11:00,048	INFO worker.py:1519 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [10]:
env1 = create_env({
    "RMS_filename": "RMS_X_test.pickle", 
    "AIR_filename": "AIR_X_test.pickle",  
    "max_allowed_loss": 0.10, 
    "window_size": 30 
})

In [8]:
from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.suggest.optuna import OptunaSearch

LR = tune.loguniform(1e-5, 1e-2)
GAMMA = tune.uniform(0.8, 0.9999)
LAMBDA = tune.uniform(0.1, 0.8)
VF_LOSS_COEFF = tune.uniform(0.01, 1.0)
ENTROPY_COEFF = tune.uniform(1e-8, 1e-1)

checkpoint_metric = 'episode_reward_mean'

# Specific configuration keys that will be used during training
env_config_training = {
    "RMS_filename": RMS_train_pickle, 
    "AIR_filename": AIR_train_pickle,  
    "max_allowed_loss": 0.10, 
    "window_size": 30 
}
# Specific configuration keys that will be used during evaluation (only the overridden ones)
env_config_evaluation = {
    "RMS_filename": RMS_test_pickle, 
    "AIR_filename": AIR_test_pickle,  
    "max_allowed_loss": 1.0, 
    "window_size": 30 
}

search_alg = OptunaSearch()
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=4)

scheduler = ASHAScheduler()

import time
start = time.time()
analysis = tune.run(
    "PPO",
    stop={
        "episode_reward_mean": 5000,
        "training_iteration": 35,
    },
    verbose=3,
    config={
        "env": "TradingEnv",
        "env_config": env_config_training,
        "log_level": "ERROR",
        #"log_level": "INFO",
        #"log_level": "DEBUG",
        "framework": "torch",
        "ignore_worker_failures": True,
        "num_workers": 2,
        "num_gpus": 0,
        "clip_rewards": True,
        "lr": LR,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "model": {
            "use_lstm": True,
            "lstm_cell_size": 512
        },
        "gamma": GAMMA,
        "observation_filter": "MeanStdFilter",
        "lambda": LAMBDA,
        "vf_share_layers": True,
        "vf_loss_coeff": VF_LOSS_COEFF,
        "entropy_coeff": ENTROPY_COEFF,
        "evaluation_interval": 1,  # Run evaluation on every iteration
        "evaluation_config": {
            "env_config": env_config_evaluation,  # The dictionary we built before (only the overriding keys to use in evaluation)
            "explore": False,  # We don't want to explore during evaluation. All actions have to be repeatable.
        },
    },
    metric=checkpoint_metric,
    mode="max",
    search_alg=search_alg,
    scheduler=scheduler,
    num_samples=10,  # Samples per hyperparameter combination. More averages out randomness. Less runs faster
    #resources_per_trial={"cpu": 2},
    keep_checkpoints_num=10,  # Keep the last 10 checkpoints
    checkpoint_freq=1,  # Do a checkpoint on each iteration (slower but you can pick more finely the checkpoint to use later)
    resume="AUTO",
)
taken = time.time() - start
print(f"Time taken: {taken:.2f} seconds.")
print(f"Best config: {analysis.best_config}")

  from ray.tune.suggest import ConcurrencyLimiter
  from ray.tune.suggest.optuna import OptunaSearch
[32m[I 2022-12-13 00:11:04,600][0m A new study created in memory with name: optuna[0m
2022-12-13 00:11:04,625	INFO trial_runner.py:596 -- No local checkpoint was found. Ray Tune will now start a new experiment.


0,1
Current time:,2022-12-13 01:10:53
Running for:,00:59:48.41
Memory:,11.7/16.0 GiB

Trial name,# failures,error file
PPO_TradingEnv_4d0ed2cc,1,"C:\Users\remik\ray_results\PPO\PPO_TradingEnv_4d0ed2cc_2_clip_rewards=True,entropy_coeff=0.0489,env=TradingEnv,AIR_filename=D_Trading_ACP_ProjetFinance_AIR_train_2022-12-13_00-11-25\error.txt"

Trial name,status,loc,entropy_coeff,gamma,lambda,lr,vf_loss_coeff,iter,total time (s),ts,reward,num_recreated_worker s,episode_reward_max,episode_reward_min
PPO_TradingEnv_408e835f,RUNNING,127.0.0.1:26548,0.0854735,0.986189,0.103988,0.000109374,0.543246,2.0,2409.33,8000.0,-3050450.0,0.0,7.44193,-11631600.0
PPO_TradingEnv_59d1c2bb,RUNNING,127.0.0.1:19488,0.0369116,0.94655,0.104093,3.41156e-05,0.701,2.0,2398.46,8000.0,-2027760.0,0.0,6.13659,-8274080.0
PPO_TradingEnv_5be00eaa,PENDING,,0.0490548,0.832678,0.767172,0.000261807,0.0576734,,,,,,,
PPO_TradingEnv_4d0ed2cc,ERROR,,0.0488513,0.844165,0.52905,1.15522e-05,0.0973891,,,,,,,


[2m[36m(pid=26548)[0m   "class": algorithms.Blowfish,
[2m[36m(PPO pid=26548)[0m 2022-12-13 00:11:13,823	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=26548)[0m 2022-12-13 00:11:13,824	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=13512)[0m   "class": algorithms.Blowfish,
[2m[36m(pid=27692)[0m   "class": algorithms.Blowfish,
[2m[36m(PPO pid=26548)[0m 2022-12-13 00:11:25,586	INFO trainable.py:164 -- Trainable.setup took 11.764 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=25552)[0m   "class": algorithms.Blowfish,
[2m[36m(PPO pid=25552)[0m 2022-12-13 00:11:35,469	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequ

Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,evaluation,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_recreated_workers,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
PPO_TradingEnv_408e835f,8000.0,"{'num_env_steps_sampled': 8000, 'num_env_steps_trained': 8000, 'num_agent_steps_sampled': 8000, 'num_agent_steps_trained': 8000}",{},2022-12-13_00-51-35,False,425.48,{},7.441926928082996,-3050453.180261421,-11631630.718981689,7.0,25.0,"{'episode_reward_max': 2874961.647152625, 'episode_reward_min': 2874961.280107353, 'episode_reward_mean': 2874961.5146105425, 'episode_len_mean': 247.0, 'episode_media': {}, 'episodes_this_iter': 10, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [2874961.280107353, 2874961.280107353, 2874961.5118537107, 2874961.5125085483, 2874961.5125085483, 2874961.647152625, 2874961.567875443, 2874961.6200812827, 2874961.6200812827, 2874961.5938292732], 'episode_lengths': [247, 247, 247, 247, 247, 247, 247, 247, 247, 247]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.8131210872987532, 'mean_inference_ms': 6.398076930043187, 'mean_action_processing_ms': 0.08385067067149196, 'mean_env_wait_ms': 6.087499020963853, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'num_agent_steps_sampled_this_iter': 2470, 'num_env_steps_sampled_this_iter': 2470, 'timesteps_this_iter': 2470, 'num_recreated_workers': 0, 'num_healthy_workers': 0}",716f36943a7d4b2aa3551fcad7b01be7,DESKTOP-N80GTVU,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.5568364645204236, 'cur_kl_coeff': 0.3, 'cur_lr': 0.0007, 'total_loss': -0.2565026796973681, 'policy_loss': -0.2046326133846906, 'vf_loss': 0.6790757979696957, 'vf_explained_var': 0.7116904679165091, 'kl': 0.0598310494737234, 'entropy': 5.132875284071892, 'entropy_coeff': 0.08547349511979327}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 127.74193548387096}}, 'num_env_steps_sampled': 8000, 'num_env_steps_trained': 8000, 'num_agent_steps_sampled': 8000, 'num_agent_steps_trained': 8000}",2.0,127.0.0.1,8000.0,8000.0,8000.0,4000.0,8000.0,4000.0,0.0,2.0,0.0,4000.0,"{'cpu_util_percent': 30.325523255813955, 'ram_util_percent': 76.58046511627907}",26548.0,{},{},{},"{'mean_raw_obs_processing_ms': 0.9200927464102733, 'mean_inference_ms': 6.473511124939162, 'mean_action_processing_ms': 0.07447941028169294, 'mean_env_wait_ms': 6.586953694261012, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 7.441926928082996, 'episode_reward_min': -11631630.718981687, 'episode_reward_mean': -3050453.180261421, 'episode_len_mean': 425.48, 'episode_media': {}, 'episodes_this_iter': 7, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [6.1711061061498445, -0.6620572318665305, 0.5326610905857091, -4651728.184518445, 2.0991519395673808, -11631630.718981687, 2.3475598954021004, 2.0856845751698883, -2068929.2055733777, 2.634524256837848, -6979901.224279572, -4651728.7742153285, -10343014.430424992, 2.8458967073470665, -9308061.949862866, 3.4893686250969562, 0.5228201197781077, -4651729.79185633, -2068930.5141288668, 2.150881913245517, 2.4218768852991235, -8274082.260583306, -11631629.005057614, 7.441926928082996, 2.4715450385075752], 'episode_lengths': [325, 325, 575, 325, 321, 343, 325, 325, 575, 325, 335, 321, 575, 325, 575, 325, 319, 323, 325, 575, 575, 575, 575, 575, 575]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.9200927464102733, 'mean_inference_ms': 6.473511124939162, 'mean_action_processing_ms': 0.07447941028169294, 'mean_env_wait_ms': 6.586953694261012, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2409.327080488205,1214.0182073116302,2409.327080488205,"{'training_iteration_time_ms': 388473.993, 'load_time_ms': 322.794, 'load_throughput': 12391.806, 'learn_time_ms': 1131260.902, 'learn_throughput': 3.536, 'synch_weights_time_ms': 14.513}",1670889095.0,0.0,8000.0,2.0,408e835f,11.771716833114624
PPO_TradingEnv_4d0ed2cc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4d0ed2cc,
PPO_TradingEnv_59d1c2bb,8000.0,"{'num_env_steps_sampled': 8000, 'num_env_steps_trained': 8000, 'num_agent_steps_sampled': 8000, 'num_agent_steps_trained': 8000}",{},2022-12-13_00-52-11,False,413.7894736842105,{},6.136594977255372,-2027756.4462592253,-8274082.3232594915,10.0,19.0,"{'episode_reward_max': 5.599820783492932, 'episode_reward_min': 5.121473929234047, 'episode_reward_mean': 5.33214835661636, 'episode_len_mean': 247.0, 'episode_media': {}, 'episodes_this_iter': 10, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [5.121473929234047, 5.174163527305629, 5.206237891395798, 5.236207885788572, 5.236207885788572, 5.28375116673052, 5.28375116673052, 5.599820783492932, 5.589934664848499, 5.589934664848499], 'episode_lengths': [247, 247, 247, 247, 247, 247, 247, 247, 247, 247]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.8026340957885569, 'mean_inference_ms': 6.296351621373417, 'mean_action_processing_ms': 0.07798386546576754, 'mean_env_wait_ms': 6.1441930386007275, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'num_agent_steps_sampled_this_iter': 2470, 'num_env_steps_sampled_this_iter': 2470, 'timesteps_this_iter': 2470, 'num_recreated_workers': 0, 'num_healthy_workers': 0}",22fbe648900b4eb9bac8bc146dbac667,DESKTOP-N80GTVU,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.5872482278975106, 'cur_kl_coeff': 0.20000000000000004, 'cur_lr': 0.0007, 'total_loss': -0.014148166479783193, 'policy_loss': -0.22188980504645095, 'vf_loss': 0.5499479574171365, 'vf_explained_var': 0.72735713111457, 'kl': 0.05880349262702649, 'entropy': 5.134777959187826, 'entropy_coeff': 0.03691156725999125}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 127.7741935483871}}, 'num_env_steps_sampled': 8000, 'num_env_steps_trained': 8000, 'num_agent_steps_sampled': 8000, 'num_agent_steps_trained': 8000}",2.0,127.0.0.1,8000.0,8000.0,8000.0,4000.0,8000.0,4000.0,0.0,2.0,0.0,4000.0,"{'cpu_util_percent': 30.200172314761634, 'ram_util_percent': 76.26145893164849}",19488.0,{},{},{},"{'mean_raw_obs_processing_ms': 0.9064599668067209, 'mean_inference_ms': 6.633228678474638, 'mean_action_processing_ms': 0.08757720347380242, 'mean_env_wait_ms': 6.786386828845582, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 6.136594977255372, 'episode_reward_min': -8274082.3232594915, 'episode_reward_mean': -2027756.4462592253, 'episode_len_mean': 413.7894736842105, 'episode_media': {}, 'episodes_this_iter': 10, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-6979895.965585376, 3.5442274316651483, 1.0736803464997873, 1.1671275555686482, 6.136594977255372, 4.1123708223731175, -2068931.006749893, -8274082.3232594915, 1.6941433745359873, -8274081.847377121, 3.5268939962330825, -4137040.5332635352, 1.820926095364526, 1.335278818444932, -2328165.328150551, 3.0014048560899034, 3.400024818913275, -2328165.6410944136, -4137040.646117991], 'episode_lengths': [321, 575, 419, 321, 575, 323, 419, 335, 323, 430, 343, 335, 321, 321, 575, 360, 575, 575, 416]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.9064599668067209, 'mean_inference_ms': 6.633228678474638, 'mean_action_processing_ms': 0.08757720347380242, 'mean_env_wait_ms': 6.786386828845582, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2398.4574801921844,1228.5524322986605,2398.4574801921844,"{'training_iteration_time_ms': 1166208.698, 'load_time_ms': 326.298, 'load_throughput': 12258.751, 'learn_time_ms': 1136337.847, 'learn_throughput': 3.52, 'synch_weights_time_ms': 12.007}",1670889131.0,0.0,8000.0,2.0,59d1c2bb,12.56243062019348


2022-12-13 00:11:47,051	ERROR ray_trial_executor.py:111 -- An exception occurred when trying to stop the Ray actor:Traceback (most recent call last):
  File "C:\Users\remik\AppData\Roaming\Python\Python39\site-packages\ray\tune\execution\ray_trial_executor.py", line 102, in _post_stop_cleanup
    ray.get(future, timeout=timeout)
  File "C:\Users\remik\AppData\Roaming\Python\Python39\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\remik\AppData\Roaming\Python\Python39\site-packages\ray\_private\worker.py", line 2291, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::PPO.__init__()[39m (pid=25552, ip=127.0.0.1, repr=PPO)
  File "C:\Users\remik\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\worker_set.py", line 139, in __init__
    self.add_workers(
  File "C:\Users\remik\AppData\Roaming\Python\Python39\site-packages\ray\

Time taken: 3589.83 seconds.
Best config: {'env': 'TradingEnv', 'env_config': {'RMS_filename': 'D:\\Trading\\ACP\\ProjetFinance\\RMS_train.pickle', 'AIR_filename': 'D:\\Trading\\ACP\\ProjetFinance\\AIR_train.pickle', 'max_allowed_loss': 0.1, 'window_size': 30}, 'log_level': 'ERROR', 'framework': 'torch', 'ignore_worker_failures': True, 'num_workers': 2, 'num_gpus': 0, 'clip_rewards': True, 'lr': 3.411557990509073e-05, 'lr_schedule': [[0, 0.1], [100, 0.01], [1000, 0.001], [10000, 0.0001], [100000, 1e-05], [1000000, 1e-06], [10000000, 1e-07]], 'model': {'use_lstm': True, 'lstm_cell_size': 512}, 'gamma': 0.9465503483672058, 'observation_filter': 'MeanStdFilter', 'lambda': 0.10409330402664757, 'vf_share_layers': True, 'vf_loss_coeff': 0.7010002326205678, 'entropy_coeff': 0.03691156725999126, 'evaluation_interval': 1, 'evaluation_config': {'env_config': {'RMS_filename': 'D:\\Trading\\ACP\\ProjetFinance\\RMS_test.pickle', 'AIR_filename': 'D:\\Trading\\ACP\\ProjetFinance\\AIR_test.pickle', 'm

In [77]:
window_size = 30
#1028
n_steps = 101
def get_optimal_batch_size(window_size=30, n_steps=101, batch_factor=4, stride=1):
    """
    lookback = 30          # Days of past data (also named window_size).
    batch_factor = 4       # batch_size = (sample_size - lookback - stride) // batch_factor
    stride = 1             # Time series shift into the future.
    """
    lookback = window_size
    sample_size = n_steps
    batch_size = ((sample_size - lookback - stride) // batch_factor)
    return batch_size

batch_size = get_optimal_batch_size(window_size=window_size, n_steps=n_steps, batch_factor=4)
batch_size

17

In [78]:
 #(1029 days - 4 trading years)
memory_capacity = n_steps * 10
n_bins = 5             # Number of bins to partition the dataset evenly in order to evaluate class sparsity.
seed = 1337
commission = 0.001
save_path = 'agents/'

agent = DQNAgent(env1)

agent.train(batch_size=batch_size, 
            n_steps=n_steps, 
            n_episodes=1, 
            memory_capacity=memory_capacity, 
            save_path=save_path)


====      AGENT ID: 26285cf4-fd3c-4a4f-9848-124eda32e3b7      ====


  0%|          | 0/1 [00:00<?, ?it/s]

FigureWidget({
    'data': [{'name': 'Price',
              'showlegend': False,
              'type': 'candle…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…

FigureWidget({
    'data': [{'close': array([ 97.72879791,  98.58650208,  98.58650208, 100.34134674,  99.57237…



100%|██████████| 1/1 [00:48<00:00, 49.00s/it]


-5895.084236606733

In [None]:
#https://www.tensortrade.org/en/latest/examples/setup_environment_tutorial.html
#https://github.com/tensortrade-org/tensortrade/blob/master/examples/train_and_evaluate.ipynb
https://levelup.gitconnected.com/portfolio-allocation-with-tensortrade-part-2-2-9ac30a6bcbfe
https://www.tensortrade.org/en/latest/agents/overview.html#stable-baselines
https://levelup.gitconnected.com/portfolio-allocation-with-tensortrade-part-2-2-9ac30a6bcbfe
    https://github.com/Tomas0413/tensortrade-experiments/blob/main/TensorTrade%20-%20Sinewave%20with%20SimpleProfit%20and%20ManagedRiskOrders.ipynb
    https://www.google.com/search?q=feature_engine&sourceid=chrome&ie=UTF-8
        https://github.com/tensortrade-org/tensortrade/blob/master/examples/train_and_evaluate.ipynb