In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import feature_manager as fma
import rl.env_simple_crypto_trade as env
import importlib
import rl.models as rla
import config as cf
import tr_utils

In [None]:
importlib.reload(fma)

fm = fma.FeatureManager(target_col="trade_signal")
fm.import_trading_data(symbol="BTCUSDT",trade_timeframe="4h")
fm.import_macro_data(symbol="BTCUSDT",macro_timeframe="1d")
fm.import_super_data(symbol="BTCUSDT",super_timeframe="1w")

fm.build_features(
    lags = 1,
    macro_lags = 1,
    super_lags = 1,
    features = cf.BITCOIN_EXTERNAL_INDICATORS + cf.TRADING_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
    macro_features = cf.MACRO_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
    super_features = cf.SUPER_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
)

In [None]:
train = fm.df.iloc[-4000:-2000]
trade = fm.df.iloc[-2000:]

In [None]:
importlib.reload(env)
importlib.reload(cf)

env_kwargs = cf.TRADE_ENV_PARAMETER
state_space = 6 + len(fm.cols)

train_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = train, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

trade_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = trade, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

full_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = fm.df, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

In [None]:
importlib.reload(rla)
agent = rla.DRLTradeAgent(env=full_env)

In [None]:
len(full_env.df)

In [None]:
importlib.reload(rla)

KWARGS = {
    "gamma": 999e-3,
    "n_steps": 8000,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 800,
}

catalog_name = tr_utils.get_name_with_kwargs("advance_reward",KWARGS)

model_ppo = agent.get_model(
    model_name="ppo",
    model_kwargs=KWARGS,
    seed=100,
    tensorboard_log=catalog_name
)

In [None]:
selected_model = model_ppo
selected_model = agent.train_model(selected_model,
    total_timesteps=2_000_000,
    checkpoint=True,
    catalog_name=catalog_name,
    save_frequency=10_000,
    progress_bar=True
)