In [5]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class StockTradingEnv(gym.Env):
    def __init__(self, df, initial_balance=10000, window_size=10):
        super(StockTradingEnv, self).__init__()

        self.df = df
        self.initial_balance = initial_balance
        self.window_size = window_size
        self.current_step = self.window_size  # Starting at step 'window_size' to have enough data
        self.balance = initial_balance
        self.shares_held = 0
        self.total_assets = initial_balance
        self.action_space = spaces.Discrete(3)  # Buy, Hold, Sell
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(self.window_size, len(df.columns)), dtype=np.float32)

    def reset(self):
        self.balance = self.initial_balance
        self.shares_held = 0
        self.total_assets = self.initial_balance
        self.current_step = self.window_size  # Start at the window_size
        return self._get_observation()

    def _get_observation(self):
        # Slice the dataframe to get a window of size 'window_size' (look-back period)
        obs = self.df.iloc[self.current_step - self.window_size:self.current_step].values
        return np.array(obs)  # Ensure it returns a numpy array of the correct shape

    def step(self, action):
        # Prevent stepping beyond the dataset
        if self.current_step >= len(self.df) - 1:
            done = True  # End the episode when we've reached the end of the data
        else:
            done = False

        # If we're not done, we can proceed with the regular action and reward logic
        if not done:
            current_price = self.df.iloc[self.current_step]['Close']
            reward = 0

            # Buy
            if action == 0:  # Buy
                if self.balance >= current_price:
                    self.shares_held += 1
                    self.balance -= current_price
                    reward = -current_price  # Negative because we spent money
            # Sell
            elif action == 1:  # Sell
                if self.shares_held > 0:
                    self.shares_held -= 1
                    self.balance += current_price
                    reward = current_price  # Positive reward for selling
            # Hold
            elif action == 2:  # Hold
                reward = 0  # No reward for holding

            # Update total assets
            self.total_assets = self.balance + self.shares_held * current_price

            # Move to the next step
            self.current_step += 1

            return self._get_observation(), reward, done, {}

        # If done (out of data), return the final observation with a reward of 0
        return self._get_observation(), 0, done, {}

    def render(self):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares Held: {self.shares_held}')
        print(f'Total Assets: {self.total_assets}')



In [6]:
from backtesting import Strategy
from stable_baselines3 import DQN
import numpy as np

class DQNStrategy(Strategy):
    def init(self):
        # Load the pre-trained DQN model
        self.model = DQN.load("dqn_stock_trading_model")
        
        # Initialize environment (not directly used in this strategy anymore)
        self.env = self.create_env()  
        
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.total_rewards = []

    def create_env(self):
        # Use historical data to initialize the Gym environment
        df = self.data.df  # Assuming df is in the format used in the StockTradingEnv
        env = StockTradingEnv(df)
        return env

    def next(self):
        # Observe the current state
        observation = self.env._get_observation()
        
        # Predict the next action using the DQN model
        action, _states = self.model.predict(observation, deterministic=True)
        
        # Take action in the environment
        _, reward, done, _ = self.env.step(action)
        
        self.total_rewards.append(reward)

        # Execute the action based on the agent's decision
        if action == 0:  # Buy
            self.buy()
        elif action == 1:  # Sell
            self.sell()

    def on_end(self):
        print("Total Rewards: ", np.sum(self.total_rewards))


In [7]:
from stable_baselines3 import DQN
import yfinance as yf

stock = yf.Ticker('AAPL')
df = stock.history(period='1y')
df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()


# Create the Gym environment
env = StockTradingEnv(df)  # Assuming 'df' is your historical stock data

# Train a DQN agent on this environment
model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# Save the trained model to a file
model.save("dqn_stock_trading_model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 242       |
|    ep_rew_mean      | -1.09e+03 |
|    exploration_rate | 0.0804    |
| time/               |           |
|    episodes         | 4         |
|    fps              | 4705      |
|    time_elapsed     | 0         |
|    total_timesteps  | 968       |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 242      |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 4703     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1936     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 242      |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    epis

In [8]:
from backtesting import Backtest

# Assuming you have your historical stock data in 'df'
backtest = Backtest(df, DQNStrategy, cash=10000, commission=.002)
results = backtest.run()

backtest.plot()


In [9]:
results

Start                     2023-11-21 00:00...
End                       2024-11-20 00:00...
Duration                    365 days 00:00:00
Exposure Time [%]                   98.809524
Equity Final [$]                  7007.696011
Equity Peak [$]                  11441.766177
Return [%]                          -29.92304
Buy & Hold Return [%]               20.710175
Return (Ann.) [%]                   -29.92304
Volatility (Ann.) [%]                 26.4677
Sharpe Ratio                        -1.130549
Sortino Ratio                       -1.145587
Calmar Ratio                        -0.690931
Max. Drawdown [%]                  -43.308297
Avg. Drawdown [%]                   -7.610534
Max. Drawdown Duration      216 days 00:00:00
Avg. Drawdown Duration       39 days 00:00:00
# Trades                                   10
Win Rate [%]                              0.0
Best Trade [%]                     -20.912246
Worst Trade [%]                    -37.984407
Avg. Trade [%]                    