In [57]:
import pandas as pd
import matplotlib.pyplot as plt
import argparse

from finrl.meta.preprocessor.preprocessors import data_split
from finrl.config import INDICATORS
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR
from finrl.config import INDICATORS
from finrl.plot import backtest_stats

# Contestants are welcome to split the data in their own way for model tuning
TRADE_START_DATE = '2018-01-01'
TRADE_END_DATE = '2022-01-01'
FILE_PATH = 'AAPL.csv'


# PPO configs
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.0003,
    "batch_size": 128,
}


In [58]:
# We will use unseen, post-deadline data for testing
parser = argparse.ArgumentParser(description='Description of program')
parser.add_argument('--start_date', default=TRADE_START_DATE, help='Trade start date (default: {})'.format(TRADE_START_DATE))
parser.add_argument('--end_date', default=TRADE_END_DATE, help='Trade end date (default: {})'.format(TRADE_END_DATE))
parser.add_argument('--data_file', default=FILE_PATH, help='Trade data file')

args = parser.parse_known_args()[0]
TRADE_START_DATE = args.start_date
TRADE_END_DATE = args.end_date

processed_full = pd.read_csv(args.data_file)
trade = data_split(processed_full, TRADE_START_DATE, TRADE_END_DATE)

stock_dimension = len(trade.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

# please do not change initial_amount
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


Stock Dimension: 1, State Space: 11


In [59]:
INDICATORS,len(INDICATORS)

(['macd',
  'boll_ub',
  'boll_lb',
  'rsi_30',
  'cci_30',
  'dx_30',
  'close_30_sma',
  'close_60_sma'],
 8)

In [60]:

check_and_make_directories([TRAINED_MODEL_DIR])

# Environment
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)


In [61]:

# PPO agent
agent = DRLAgent(env = e_trade_gym)
model_ppo = agent.get_model("ppo", model_kwargs = PPO_PARAMS)
trained_ppo = PPO.load(TRAINED_MODEL_DIR + '/trained_ppo')

# Backtesting
df_result_ppo, df_actions_ppo = DRLAgent.DRL_prediction(model=trained_ppo, environment = e_trade_gym)
print("==============Get Backtest Results===========")
perf_stats_all = backtest_stats(account_value=df_result_ppo)

"""Plotting"""
plt.rcParams["figure.figsize"] = (15,5)
plt.figure()

df_result_ppo.plot()
plt.savefig("plot.png")


{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.0003, 'batch_size': 128}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
hit end!
Annual return          0.382462
Cumulative returns     2.652686
Annual volatility      0.282966
Sharpe ratio           1.287771
Calmar ratio           1.217146
Stability              0.892507
Max drawdown          -0.314228
Omega ratio            1.301201
Sortino ratio          1.926447
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.108345
Daily value at risk   -0.034204
dtype: float64


In [19]:

df_result_ppo.to_csv("results.csv", index=False)
df_actions_ppo.to_csv("actions.csv", index=False)

In [24]:
df_result_ppo

Unnamed: 0,date,account_value
0,2018-01-02,1.000000e+06
1,2018-01-03,9.999980e+05
2,2018-01-04,1.000012e+06
3,2018-01-05,1.000068e+06
4,2018-01-08,1.000041e+06
...,...,...
1003,2021-12-27,3.709461e+06
1004,2021-12-28,3.688067e+06
1005,2021-12-29,3.689919e+06
1006,2021-12-30,3.665645e+06


In [62]:
import numpy as np

In [97]:
obs, info = e_trade_gym.reset()

act,_=trained_ppo.predict(obs,deterministic=True)
obs, reward, done, info, _=e_trade_gym.step(act)

In [98]:
print(obs,reward,done,info)

[998326.4908518982, 40.7694206237793, 41, 0.097252029031793, 41.79277009100359, 39.67199049859602, 54.84428224237349, 32.02815957909118, 8.972580382927342, 40.74543825785319, 39.801144727071126] -0.00019629025268368423 False False


In [83]:
trade.iloc[1:5][INDICATORS]

Unnamed: 0,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
1,0.097252,41.79277,39.67199,54.844282,32.02816,8.97258,40.745438,39.801145
2,0.108549,41.801378,39.74363,55.752192,26.627213,8.97258,40.769505,39.871166
3,0.153362,41.845289,39.84151,57.907959,81.805295,17.562463,40.784181,39.948723
4,0.174451,41.854196,39.951671,56.960774,87.168362,18.59969,40.779368,40.021161


In [56]:
trade.iloc[0:5]

Unnamed: 0.1,Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,79518,2018-01-02,AAPL,42.540001,43.075001,42.314999,40.77652,102223600.0,1.0,0.100701,41.790647,39.616592,54.885091,-26.206853,2.047009,40.729026,39.732157,9.77,87.994386
0,79519,2018-01-02,AMGN,175.350006,177.820007,174.419998,148.734131,2301100.0,1.0,0.324969,150.30249,146.077596,52.066901,43.21754,1.005582,146.796396,147.634223,9.77,87.994386
0,79520,2018-01-02,AXP,99.730003,99.730003,98.220001,90.798882,2746700.0,1.0,0.838976,91.720988,89.499608,61.220407,54.839514,8.987192,89.476157,87.715672,9.77,87.994386
0,79521,2018-01-02,BA,295.75,296.98999,295.399994,282.886383,2978900.0,1.0,6.566992,290.952447,263.013742,69.766712,77.243008,43.332282,269.661096,258.772742,9.77,87.994386
0,79522,2018-01-02,CAT,158.300003,159.389999,156.029999,135.989395,5108400.0,1.0,4.423927,140.547637,118.203754,74.427824,124.902232,60.495428,126.169499,121.005159,9.77,87.994386


In [1]:
!python ./interpretable.py

  PANDAS_VERSION = LooseVersion(pd.__version__)
Stock Dimension: 1, State Space: 11
{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.0003, 'batch_size': 128}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
(755, 11) (755, 1)
  return F.mse_loss(input, target, reduction=self.reduction)
train loss: 1.82e+05 | test loss: 1.82e+05 | reg: 5.87e+01 : 100%|██| 50/50 [00:49<00:00,  1.01it/s]


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import argparse

from finrl.meta.preprocessor.preprocessors import data_split
from finrl.config import INDICATORS
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR
from finrl.config import INDICATORS
from finrl.plot import backtest_stats
import torch
from kan import KAN
import numpy as np

# Contestants are welcome to split the data in their own way for model tuning
TRADE_START_DATE = '2010-01-01'
TRADE_END_DATE = '2020-06-29'
FILE_PATH = 'datasets/train_data.csv'
INDICATORS = [ "macd", "boll_ub", "boll_lb", "rsi_30", "cci_30", "dx_30", "close_30_sma"]
# "money","stock","close"
HIDDEN_WIDTH=(3,)
# PPO configs
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.0003,
    "batch_size": 128,
}

class InterpretablePolicyExtractor:
    lib = ['x', 'x^2', 'x^3', 'x^4', 'exp', 'log', 'sqrt', 'tanh', 'sin', 'abs']

    def __init__(self, obs_dim, act_dim, hidden_widths,device):
        self.device=device
        print([obs_dim, *hidden_widths, act_dim])
        self.policy = KAN(width=[obs_dim, *hidden_widths, act_dim],device=self.device)
        self.loss_fn = torch.nn.MSELoss() 

    def train_from_dataset(self, dataset, steps: int = 20):
        return self.policy.train(dataset, opt="LBFGS", steps=steps, loss_fn=self.loss_fn)

    def forward(self, observation):
        observation = torch.from_numpy(observation).float()
        action = self.policy(observation.unsqueeze(0))
        return action.squeeze(0).detach().numpy()

if __name__ == '__main__':
    # We will use unseen, post-deadline data for testing
    parser = argparse.ArgumentParser(description='Description of program')
    parser.add_argument('--start_date', default=TRADE_START_DATE, help='Trade start date (default: {})'.format(TRADE_START_DATE))
    parser.add_argument('--end_date', default=TRADE_END_DATE, help='Trade end date (default: {})'.format(TRADE_END_DATE))
    parser.add_argument('--data_file', default=FILE_PATH, help='Trade data file')

    args = parser.parse_known_args()[0]#parser.parse_args()
    TRADE_START_DATE = args.start_date
    TRADE_END_DATE = args.end_date
    device ='cpu' #torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)
    processed_full = pd.read_csv(args.data_file)
    trade = data_split(processed_full, TRADE_START_DATE, TRADE_END_DATE)
    
    stock_dimension = len(trade.tic.unique())
    state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    buy_cost_list = sell_cost_list = [0.001] * stock_dimension
    num_stock_shares = [0] * stock_dimension

    # please do not change initial_amount
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "num_stock_shares": num_stock_shares,
        "buy_cost_pct": buy_cost_list,
        "sell_cost_pct": sell_cost_list,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": INDICATORS,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    check_and_make_directories([TRAINED_MODEL_DIR])

    # Environment
    e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
    
    # PPO agent
    agent = DRLAgent(env = e_trade_gym)
    model_ppo = agent.get_model("ppo", model_kwargs = PPO_PARAMS)
    trained_ppo = PPO.load(TRAINED_MODEL_DIR + '/trained_ppo')

    # Backtesting
    e_trade_gym.reset()
    obs, info = e_trade_gym.reset()
    Obs,Act = [], []
    while True:
        act, _ = trained_ppo.predict(obs,deterministic=True)
        obs, reward, done, info, _=e_trade_gym.step(act)
        Obs.append(np.array(obs))
        Act.append(np.array(act))
        if done:
            break

  PANDAS_VERSION = LooseVersion(pd.__version__)


cpu
Stock Dimension: 29, State Space: 262
{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.0003, 'batch_size': 128}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from '/home/freddy645645/.local/lib/python3.10/site-packages/cloudpickle/cloudpickle.py'>
Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from '/home/freddy645645/.local/lib/python3.10/site-packages/cloudpickle/cloudpickle.py'>


In [2]:
Obs=np.array(Obs)
Act=np.array(Act)

In [3]:

Obs[0,0],Obs[0,1:state_space:stock_dimension],Act[0]

(990671.2625025443,
 array([6.47314882e+00, 7.00000000e+00, 2.50649758e-04, 6.48336225e+00,
        6.45176358e+00, 1.00000000e+02, 6.66666667e+01, 1.00000000e+02,
        6.46756291e+00]),
 array([ 0.07297935,  0.1171992 , -0.15061091,  0.0762595 , -0.03399291,
        -0.29635954, -0.06371976,  0.12015542, -0.12430508,  0.03927933,
        -0.01609682, -0.18481097,  0.24463241,  0.22103421,  0.19086793,
         0.01203714, -0.08494841,  0.18690364,  0.07116038,  0.19396436,
         0.12846857, -0.02107504,  0.26639253,  0.11042807,  0.07798262,
         0.43434235, -0.02202073,  0.06234317,  0.210268  ], dtype=float32))

In [12]:
stocks=trade.tic.unique()
stocks_Obs=[np.concatenate((Obs[:,0].reshape(-1,1),Obs[:,i+1:state_space:stock_dimension]),axis=1) for i in range(stock_dimension)]
stocks_Act=[Act[:,i].reshape(-1,1) for i in range(stock_dimension)]
    

In [13]:
input_names=["money","close","holding",*INDICATORS]


In [14]:
stocks_Obs[0].shape

(2639, 10)

In [15]:
stocks_Obs=[np.concatenate(stocks_Obs,axis=0)]
stocks_Act=[np.concatenate(stocks_Act,axis=0)]

In [17]:
stocks=['all']
stocks_Obs[0].shape,stocks_Act[0].shape

((76531, 10), (76531, 1))

In [18]:
for stock,obs,act in zip(stocks,stocks_Obs,stocks_Act):
    obsTen=torch.tensor(obs).float().to(device)
    actTen=torch.tensor(act).float().to(device)
    dataset={'train_input': obsTen,
             'train_label': actTen,
             'test_input': obsTen,
             'test_label': actTen,
             }
    agent=InterpretablePolicyExtractor(obs_dim=obs.shape[1],act_dim=act.shape[1],hidden_widths=HIDDEN_WIDTH,device=device)
    agent.train_from_dataset(dataset,steps=50)
    #agent.policy.prune()
    #agent.policy.prune()
    agent.policy.plot(scale=10, beta=100, title=f'{stock} KAN')#in_vars=input_names,
    print(stock)
    plt.savefig(f"pics/{stock}-kan-policy.png")
    

[10, 3, 1]


train loss: 1.02e-01 | test loss: 1.02e-01 | reg: 1.12e+01 : 100%|██| 50/50 [03:25<00:00,  4.11s/it]


all


In [17]:
max(abs(stocks_Act[0]))

array([0.16984878], dtype=float32)