In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import feature_manager as fma
import rl.env_simple_crypto_trade as env
import importlib
import rl.models as rla
import config as cf
import numpy as np
import os
import time

In [None]:
importlib.reload(fma)

fm = fma.FeatureManager(target_col="trade_signal")
fm.import_trading_data(symbol="BTCUSDT",trade_timeframe="4h")
fm.import_macro_data(symbol="BTCUSDT",macro_timeframe="1d")
fm.import_super_data(symbol="BTCUSDT",super_timeframe="1w")

fm.build_features(
    lags = 1,
    macro_lags = 1,
    super_lags = 1,
    features = cf.BITCOIN_EXTERNAL_INDICATORS + cf.TRADING_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
    macro_features = cf.MACRO_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
    super_features = cf.SUPER_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
)

In [None]:
importlib.reload(env)

env_kwargs = cf.TRADE_ENV_PARAMETER
state_space = 6 + len(fm.cols)

train = fm.df.iloc[-4000:-2000]
trade = fm.df.iloc[-2000:]

train_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = train, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

trade_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = trade, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)
full_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = fm.df, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

In [None]:
importlib.reload(rla)
agent = rla.DRLTradeAgent(env=full_env)

In [None]:
test_env = full_env
test_laps = 3

profit_list = []
std_list = []

%store profit_list 
%store std_list

start_timestep = 950_000
end_timestep = 1_000_000 + 1
frequency = 5_000
catalog_name= "gamma_999e-3_rwd_advance"

for step in range(start_timestep,end_timestep,frequency):
    print(f"Timestep {step}:")

    checkpoint_path = f"{catalog_name}/rl_model_{step}_steps"
    path = cf.CHECKPOINT_CALLBACK["save_dir"]+checkpoint_path+".zip"

    while(not os.path.exists(path)):
        time.sleep(10)
        continue    
    
    model = agent.load_model_from_checkpoint("ppo",checkpoint_path)

    profits = []
    costs = []
    for i in range(0,test_laps):
        agent.predict(model,test_env,render=False)
        result = agent.make_result_data()
        agent.describe_trades()
        agent.plot_multiple(dpi=240)
        profit = result.iloc[-1,:]["cumsum_trade_profit"]
        profits.append(profit)
 
    mean_profit = np.array(profits).mean()
    std_profit = np.array(profits).std()
    profit_list.append(mean_profit)
    std_list.append(std_profit)
    print(f"\t Average profit: {mean_profit}, sdt: {std_profit}")
    %store profit_list 
    %store std_list