#### Imports

In [1]:
import os
import pandas as pd
import pandas_ta as ta
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from stable_baselines3 import PPO

#### Local imports

In [2]:
from add_indicators import add_indicators, normalize_features
from trading_env import CustomStocksWithIndicatorsEnv

from evaluation import EvalProfitCallback, ProfitToTensorboardCallback, make_eval_env

#### DF Setup

In [3]:
df = pd.read_csv("data/btc_1m.csv", parse_dates=['timestamp'])
df = add_indicators(df)

df = df.rename(columns={"date": "Date", "open": "Open", "high": "High", "low": "Low", "close": "Close", "volume": "Volume"})
df.set_index("Date", inplace=True)


indicators = ['ema_9', 'ema_21', 'macd', 'rsi_7', 
    'ema_9_slope', 'ema_21_slope',
    'gap', 'high_percent', 'low_percent',
    'atr_14', 'boll', 'boll_ub', 'boll_lb', 'vwap',
    'stoch_k', 'stoch_d', 'adx',
    'obv', 'cmf', 'zscore_close_20', 'rvol', 'cci']
df = normalize_features(df, indicators)

split_index = int(len(df) * 0.9)
df_train = df.iloc[:split_index].copy()
df_eval = df.iloc[split_index - 60:].copy()

In [4]:
len(df_train), len(df_eval)

(1201899, 133605)

#### Env setup

In [5]:
env = CustomStocksWithIndicatorsEnv(df=df_train, window_size=60, frame_bound=(60, len(df_train) - 1), max_episode_steps=2 ** 10, features=indicators)
env.trade_fee_ask_percent = 0.0004
env.trade_fee_bid_percent = 0.0004
env = Monitor(env)

#### Model

In [6]:
log_dir = "./ppo_btc_logs/"    


model = PPO(
    "MlpPolicy",
    env,
    n_steps=1024,
    batch_size=128,
    learning_rate=1e-5, 
    gamma=0.9995,
    gae_lambda=0.99,
    ent_coef=0.05,
    vf_coef=0.1,
    max_grad_norm=0.9,
    clip_range=0.2,
    tensorboard_log=log_dir,
    verbose=1,
    normalize_advantage=True
)

Using cpu device
Wrapping the env in a DummyVecEnv.


#### Callbacks

In [7]:
eval_callback = EvalProfitCallback(make_eval_env(df_eval, features=indicators), eval_freq=100_000, n_eval_episodes=1)

#### Logging

In [8]:
if not os.path.exists(log_dir):
    #calculate len of folders in log_dir
    os.makedirs(log_dir)
    log_dir = f"{log_dir}run_{len(os.listdir(log_dir)) + 1}/"
else:
    #if log_dir exists, create a new folder with incremented number
    existing_runs = [d for d in os.listdir(log_dir) if os.path.isdir(os.path.join(log_dir, d))]
    run_number = len(existing_runs) + 1
    log_dir = f"{log_dir}run_{run_number}/"

os.makedirs(log_dir, exist_ok=True)

logger = configure(log_dir, ["stdout", "tensorboard"])
model.set_logger(logger)
#delete dir if it exists


Logging to ./ppo_btc_logs/run_13/


#### Learning

In [None]:
callback = [eval_callback, ProfitToTensorboardCallback(verbose=0)]

model.learn(total_timesteps=5_000_000, progress_bar=False, callback=callback,)

---------------------------------
| rollout/           |          |
|    buy_and_hold    | 1.63     |
|    ep_len_mean     | 887      |
|    ep_rew_mean     | -87.1    |
|    profit_delta    | -0.934   |
|    total_profit    | 0.7      |
| time/              |          |
|    fps             | 351      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 1024     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    buy_and_hold         | 1.79         |
|    ep_len_mean          | 878          |
|    ep_rew_mean          | -83.3        |
|    profit_delta         | -1.09        |
|    total_profit         | 0.699        |
| time/                   |              |
|    fps                  | 319          |
|    iterations           | 2            |
|    time_elapsed         | 6            |
|    total_timesteps      | 2048         |
| train/                  |              |