In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import feature_manager as fma
import rl.env_simple_crypto_trade as env
import importlib
import rl.models as rla
import config as cf
import numpy as np

In [None]:
importlib.reload(fma)

fm = fma.FeatureManager(target_col="trade_signal")
fm.import_trading_data(symbol="BTCUSDT",trade_timeframe="4h")
fm.import_macro_data(symbol="BTCUSDT",macro_timeframe="1d")
fm.import_super_data(symbol="BTCUSDT",super_timeframe="1w")

fm.build_features(
    lags = 1,
    macro_lags = 1,
    super_lags = 1,
    features = cf.BITCOIN_EXTERNAL_INDICATORS + cf.TRADING_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
    macro_features = cf.MACRO_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
    super_features = cf.SUPER_TA_INDICATORS + cf.CANDLESTICK_INDICATORS,
)

In [None]:
train = fm.df.iloc[-4000:-2000]
trade = fm.df.iloc[-2000:]

In [None]:
importlib.reload(env)

In [None]:
env_kwargs = cf.TRADE_ENV_PARAMETER
state_space = 6 + len(fm.cols)

train_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = train, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

trade_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = trade, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)
full_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = fm.df, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

In [None]:
importlib.reload(rla)
agent = rla.DRLTradeAgent(env=full_env)

In [None]:
KWARGS = {
    "gamma": 999e-3,
    "n_steps": 8000,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 800,
}

timestep =1900000

catalog_name = "gamma_0.999_n_steps_8000"
selected_model = agent.load_model_from_checkpoint(
    "ppo",f"{catalog_name}/rl_model_{timestep}_steps")
agent.predict(
    model=selected_model,
    environment=full_env,
    render=False
)

In [None]:
agent.plot_reward(log=False,dpi=920)

In [69]:
full_env.asset_value_change_memory

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 -0.023299753188172734,
 -0.014411119175166806,
 0.06059610119898882,
 -0.007537688442211055,
 -0.015254852320675142,
 0.0002485168258745124,
 0.003023441308484473,
 0.008370759740403948,
 -0.055885347577638064,
 0.0007976191651107038,
 -0.015404462390269085,
 0.017506576233777982,
 0.005256375838926155,
 -0.010877054302485237,
 -0.01267329940467349,
 0.043180353540739874,
 -0.005275919768971075,
 -0.0012543844161182105,
 0.01798889865160208,
 -0.006996523337872079,
 -0.020607586654532134,
 -0.009544715981624869,
 0.004479305411671892,
 0.011189401901421422,
 -0.008982453661594919,
 0.022419087233060523,
 -0.005921120289205896,
 -0.005251511453379872,
 0.006540435065404274,
 0.04065040650406505,
 -0.004398896068928282,
 0.02983726205406788,
 0.00435186864463465,
 0.034066805026718765,
 -0.018238667207141977,
 -0.013601927127474203,
 0.006822942122488278,
 0.01259387665294628,
 0.0038960425715481883,
 0.033895089134453854,
 -0.

In [None]:
df = pd.DataFrame({
    "reward":agent.reward_memory,
    "trade_profit":agent.trade_profit_memory,
    "unallocated_reward":agent.unallocated_reward_memory})

In [None]:
df["reward_dir"] = np.sign(df["reward"])
df["trade_profit_dir"] = np.sign(df["trade_profit"])
df["unallocated_reward_dir"] = np.sign(df["unallocated_reward"])

In [None]:
df["reward-profit"] = df["reward_dir"] - df["trade_profit_dir"]
df["reward-unallocated"] = df["reward_dir"] - df["unallocated_reward_dir"]

In [None]:
df["reward-profit"].value_counts()

In [None]:
df["reward-unallocated"].value_counts()

In [None]:
df["allocate-reward-ratio"] = df["reward"]/(df["reward"]+df["unallocated_reward"])

In [None]:
import numpy as np

In [None]:
pd.DataFrame(np.log(df["allocate-reward-ratio"])).plot()

R = r1*r2*r3*r4+...
