In [2]:
import pandas as pd
import backtrader as bt
from pathlib import Path
import numpy as np
from config import config
import tensorflow as tf
from datetime import datetime
from utils import create_tr_bt_feed, progress_info
import os
from drl_agent import DRLAgent
from pathlib import  Path
from strategies import  DRLStrategy
from backtrader.sizers import PercentSizer
from statistics import mean

## Clear tensorflow session and set random seed for results reproduction

In [3]:
tf.keras.backend.clear_session()
np.random.seed(config['seed'])

## Read tickers

In [4]:
tic_filename = Path('data/nasdaq_index_list_filtered.csv')
tickers = pd.read_csv(tic_filename)['0'].to_list()

## Create DRL agent

In [5]:
agent = DRLAgent()

## Train the model and save weights

In [6]:
np.random.seed(config['seed'])
max_episodes = config['max_episodes']
trading_days = config['trading_days']
tickers_len = len(tickers)
results_fname = Path('results/training_results.csv')
Path('results').mkdir(exist_ok=True)
start = datetime.now()
yf_filename = os.path.abspath('data/ohlcv/{}.csv') # Template for filename for a ticker symbol

# Store data for the last 100 episodes to track performance
run_diffs_100 = []
run_actions_100 = []
run_losses_100 = []

for episode in range(1, max_episodes + 1):
    ticker = tickers[np.random.randint(0,len(tickers))] # Select a random ticker from the filtered list
    cerebro = bt.Cerebro()
    cerebro.addstrategy(
        DRLStrategy,
        agent=agent,
    )

    feed, first_close = create_tr_bt_feed(ticker, filename=yf_filename)
    cerebro.adddata(feed)

    # Prevent the rejection of orders, which could cause instability in the DNN learning process.
    cerebro.broker.setcash(10000)
    # Sizing does not influence the rewards, and thus the learning process.
    cerebro.addsizer(PercentSizer, percents=10.0)

    cerebro.broker.setcommission(commission=.0)
    cerebro.broker.set_slippage_perc(config['slippage_pct']/100)
    cerebro.broker.set_shortcash(False) # Do not allow for short trades

    thestrat = cerebro.run()[0]


    run_diffs_100 = run_diffs_100 + thestrat.diffs
    run_actions_100 = run_actions_100 + thestrat.actions
    run_losses_100 = run_losses_100 + agent.losses
    run_diffs = pd.Series(thestrat.diffs)
    run_market_returns = pd.Series(thestrat.market_returns)
    run_strategy_returns = pd.Series(thestrat.strategy_returns)
    run_actions = pd.Series(thestrat.actions)
    run_losses = pd.Series(agent.losses)

    pd.DataFrame({
        'diffs': run_diffs,
        'market_returns': run_market_returns,
        'strategy_returns': run_strategy_returns,
        'actions': run_actions,
        'losses': pd.Series([run_losses.mean()] * run_actions.count())
    }).to_csv(results_fname, mode='a' if results_fname.exists() else 'x', header=not results_fname.exists())

    # Print status of the training each 100-th episode to track progress
    if episode % 100 == 0 or episode == 1 or episode == max_episodes+1:
        progress_info(
            episode, max_episodes, start, 
            mean(run_diffs_100) if run_diffs_100 else 0.0, 
            mean(map(float, run_actions_100)) if run_actions_100 else 0.0 if run_actions_100 else 0.0, 
            mean(run_losses_100) if run_losses_100 else 0.0, 
            agent.epsilon, episode, run_diffs[run_diffs > 0].count()/run_diffs.count()*100,
            thestrat.buy_count, thestrat.sell_count)
        run_diffs_100 = []
        run_actions_100 = []
        run_losses_100 = []
    
    # Lower epsilon if replay buffer is at least of size, that allows for experience replay.
    if episode * config['trading_days'] >= config['batch_size']:
        agent.epsilon_decay_step()

    # Save weights in theevery episode. In the case of faster convergance, the training can be stopped, and the weights can be restored.
    agent.online_network.save_weights(os.path.abspath('results/model.weights.h5'))
    


Episode: 1    | Diff (MA100): 0.02% | Losses (MA100): 0.000000 | Epsilon: 1.00 | Market beats: 15.48% | Actions (MA100): 4.90 | Buys: 75        | Sells: 75       | Progress: 0.10% | Est. time left: 117.43 sec     | ETA: 2024-09-12 14:20:59      
Episode: 100  | Diff (MA100): -0.04% | Losses (MA100): 0.036068 | Epsilon: 0.67 | Market beats: 13.49% | Actions (MA100): 5.22 | Buys: 69        | Sells: 69       | Progress: 10.00% | Est. time left: 11525.68 sec   | ETA: 2024-09-12 17:52:28      
Episode: 200  | Diff (MA100): -0.04% | Losses (MA100): 0.036086 | Epsilon: 0.28 | Market beats: 13.10% | Actions (MA100): 6.14 | Buys: 59        | Sells: 59       | Progress: 20.00% | Est. time left: 11598.70 sec   | ETA: 2024-09-12 18:20:40      
Episode: 300  | Diff (MA100): -0.01% | Losses (MA100): 0.035448 | Epsilon: 0.01 | Market beats: 12.30% | Actions (MA100): 7.15 | Buys: 63        | Sells: 63       | Progress: 30.00% | Est. time left: 11017.81 sec   | ETA: 2024-09-12 18:41:21      
Episode: 4