## Setup: Import Libraries and Configuration

In [2]:
!pip install torch numpy pykan==0.0.5 pandas matplotlib
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

Collecting pykan==0.0.5
  Using cached pykan-0.0.5-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Col

In [1]:
# Cell 1: Imports and Setup
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from finrl.meta.preprocessor.preprocessors import data_split, FeatureEngineer
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO
from finrl.config import TRAINED_MODEL_DIR, RESULTS_DIR, DATA_SAVE_DIR
from finrl.meta.data_processors.processor_yahoofinance import YahooFinanceProcessor as YahooDownloader
from finrl import config_tickers
from finrl.main import check_and_make_directories
from kan import KAN
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
import itertools

# Ensure directories exist
check_and_make_directories([TRAINED_MODEL_DIR, RESULTS_DIR, DATA_SAVE_DIR])
os.makedirs(os.path.join(RESULTS_DIR, "pics"), exist_ok=True)

# Define constants
INDICATORS = ["boll_ub", "boll_lb", "rsi_30", "dx_30", "close_30_sma"]
START_DATE = '2024-01-02'
END_DATE = '2024-04-30'
TRADE_START_DATE = '2024-05-01'
TRADE_END_DATE = '2024-05-15'
TICKER_LIST = config_tickers.DOW_30_TICKER
TIME_INTERVAL = '1H'

print("Environment setup complete.")

ModuleNotFoundError: No module named 'finrl'

# Data Collection

In [None]:
# Cell 2: Data Collection
class DataFetcher:
    def __init__(self, start_date, end_date, time_interval, ticker_list):
        self.start_date = start_date
        self.end_date = end_date
        self.time_interval = time_interval
        self.ticker_list = ticker_list

    def fetch_data(self):
        df_raw = YahooDownloader().download_data(self.ticker_list, self.start_date, self.end_date, time_interval=self.time_interval)
        df_raw['date'] = pd.to_datetime(df_raw['timestamp'].str[:19], utc=False) if pd.api.types.is_string_dtype(df_raw['timestamp']) else pd.to_datetime(df_raw['timestamp'], utc=False)
        return df_raw.drop(columns=['timestamp'])

    def preprocess_data(self, df_raw):
        fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list=INDICATORS, use_turbulence=False, user_defined_feature=False)
        processed = fe.preprocess_data(df_raw)
        list_ticker = processed["tic"].unique().tolist()
        list_date = processed["date"].unique().tolist()
        combination = list(itertools.product(list_date, list_ticker))
        processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left")
        processed_full = processed_full[processed_full['date'].isin(processed['date'])].sort_values(['date', 'tic']).fillna(0)
        return processed_full

# Fetch training and trading data
train_fetcher = DataFetcher(START_DATE, END_DATE, TIME_INTERVAL, TICKER_LIST)
trade_fetcher = DataFetcher(TRADE_START_DATE, TRADE_END_DATE, TIME_INTERVAL, TICKER_LIST)

train_data_init = train_fetcher.preprocess_data(train_fetcher.fetch_data())
trade_data_init = trade_fetcher.preprocess_data(trade_fetcher.fetch_data())

train_data_init.to_csv(os.path.join(DATA_SAVE_DIR, 'train_data.csv'), index=False)
trade_data_init.to_csv(os.path.join(DATA_SAVE_DIR, 'trade_data.csv'), index=False)

In [None]:
train_data = data_split(pd.read_csv(os.path.join(DATA_SAVE_DIR, 'trade_data_1H.csv')), START_DATE, END_DATE)
trade_data = data_split(pd.read_csv(os.path.join(DATA_SAVE_DIR, 'trade_data_1H.csv')), TRADE_START_DATE, TRADE_END_DATE)

print("Training Data Shape:", train_data.shape)
print("Trading Data Shape:", trade_data.shape)

# Train PPO Model

In [None]:
# Cell 3: Train PPO Model
from stable_baselines3.common.logger import configure

stock_dimension = len(train_data.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension
env_kwargs = {
    "hmax": 100, "initial_amount": 1000000, "num_stock_shares": [0] * stock_dimension,
    "buy_cost_pct": [0.001] * stock_dimension, "sell_cost_pct": [0.001] * stock_dimension,
    "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension, "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env=env_train)
PPO_PARAMS = {"n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.0003, "batch_size": 128}
model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)

tmp_path = os.path.join(RESULTS_DIR, 'ppo')
new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
model_ppo.set_logger(new_logger_ppo)

trained_ppo = agent.train_model(model=model_ppo, tb_log_name='ppo', total_timesteps=80000)
trained_ppo.save(os.path.join(TRAINED_MODEL_DIR, 'trained_ppo'))

print("PPO Model Trained and Saved.")

## Analysis 3: KAN Interpretability (Post-Experiment)

In [None]:
# Cell 4: Backtest PPO Model
e_trade_gym = StockTradingEnv(df=trade_data, **env_kwargs)
trained_ppo = PPO.load(os.path.join(TRAINED_MODEL_DIR, 'trained_ppo'))
df_result_ppo, df_actions_ppo = DRLAgent.DRL_prediction(model=trained_ppo, environment=e_trade_gym)

df_result_ppo.to_csv(os.path.join(RESULTS_DIR, 'results_ppo.csv'), index=False)
df_actions_ppo.to_csv(os.path.join(RESULTS_DIR, 'actions_ppo.csv'), index=False)

plt.figure(figsize=(15, 5))
df_result_ppo.plot()
plt.title("PPO Backtest Results")
plt.savefig(os.path.join(RESULTS_DIR, 'plot_ppo.png'))
plt.show()

# Experiments

In this section, we conduct experiments to train the PPO model, backtest it, and extract interpretable policies using KAN.

## Experiment 1: Train PPO Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Cell 5: Interpretable Policy Extraction with KAN
class InterpretablePolicyExtractor:
    def __init__(self, obs_dim, act_dim, hidden_widths, device):
        self.device = device
        self.policy = KAN(width=[obs_dim, *hidden_widths, act_dim], device=self.device)
        self.loss_fn = torch.nn.MSELoss()

    def train_from_dataset(self, dataset, steps=20):
        return self.policy.train(dataset, opt="LBFGS", steps=steps, loss_fn=self.loss_fn)

e_trade_gym.reset()
obs, _ = e_trade_gym.reset()
Obs, Act = [], []
while True:
    act, _ = trained_ppo.predict(obs, deterministic=True)
    obs, reward, done, _, _ = e_trade_gym.step(act)
    Obs.append(np.array(obs))
    Act.append(np.array(act))
    if done:
        break

Obs = np.array(Obs)
Act = np.array(Act)
stocks = trade_data.tic.unique()
stocks_Obs = [np.concatenate((Obs[:, 0].reshape(-1, 1), Obs[:, i+1:state_space:stock_dimension]), axis=1) for i in range(stock_dimension)]
stocks_Act = [Act[:, i].reshape(-1, 1) for i in range(stock_dimension)]

device = 'cpu'
for stock, obs, act in zip(stocks, stocks_Obs, stocks_Act):
    obsTen = torch.tensor(obs).float().to(device)
    actTen = torch.tensor(act).float().to(device)
    dataset = {'train_input': obsTen, 'train_label': actTen, 'test_input': obsTen, 'test_label': actTen}
    agent = InterpretablePolicyExtractor(obs_dim=obs.shape[1], act_dim=act.shape[1], hidden_widths=(3,), device=device)
    agent.train_from_dataset(dataset, steps=50)
    agent.policy.prune(threshold=0.0001)
    agent.policy.plot(scale=10, beta=100, title=f'{stock} KAN')
    plt.savefig(os.path.join(RESULTS_DIR, "pics", f"{stock}-kan-policy.png"))
    plt.close()

print("KAN Policies Extracted and Plotted.")

## Experiment 2: Backtest PPO Model

In [None]:
# Cell 6: Unsupervised Learning with KMeans
features = trade_data[INDICATORS].values
kmeans = KMeans(n_clusters=3, random_state=42).fit(features)
trade_data['cluster'] = kmeans.labels_

plt.figure(figsize=(10, 6))
plt.scatter(trade_data['close'], trade_data['rsi_30'], c=trade_data['cluster'], cmap='viridis')
plt.title("KMeans Clustering on Trade Data")
plt.xlabel("Close Price")
plt.ylabel("RSI 30")
plt.savefig(os.path.join(RESULTS_DIR, 'kmeans_plot.png'))
plt.show()

print("KMeans Clustering Completed.")