In [2]:
import pandas as pd
from backtesting import Backtest, Strategy
from gymnasium import spaces, Env
from stable_baselines3 import PPO
import numpy as np

In [3]:
class periodicStrategy(Strategy):
    def init(self):
        print(f"Start with equity={self.equity:.2f}")
        
    def next(self, action:int|None=None):
        print(f"Action={action} Equity={self.equity:.2f} Date={self.data.index[-1]}")
        if action:
            if action == 1:
                self.buy()
            elif action == 2:
                self.position.close()
            
    def observation(self):
        closes = self.data.Close[-20:]
        closes = (closes - closes.min()) / (closes.max() - closes.min())
        return [closes]

In [4]:
class CustomEnv(Env):
    """Custom Environment that follows gym interface."""

    def __init__(self, bt: Backtest):
        # observation (1,20) = (close price, 20 back days)
        self.observation_space = spaces.Box(low=-1, high=1, shape=(1, 20), dtype=np.float32)
        # action -1 sell all shares, 1 buy all shares for 1 crypto
        self.action_space = spaces.Discrete(3)
        self.bt = bt
        
    def reward_calculation(self):
        if self.previous_equity < self.bt._step_strategy.equity:
            return +1
        return -1
        
    def check_done(self):
        if self.bt._step_time + 2 > len(self.bt._data):
            self.render()
            return True
        return False
        
    def step(self, action):
        obs = self.bt._step_strategy.observation()
        reward = self.reward_calculation()
        done = self.check_done()
        info = {}
        self.bt.next(action=action)
        # False is done (never finish because the market can not finish)
        # done is the truncate (the market can be truncated)
        return obs, reward, False, done, info

    def reset_backtesting(self):
        # backtesting, give first next because when initialize can return the whole dataset
        self.bt.initialize()
        self.bt.next()
        while True:
            obs = self.bt._step_strategy.observation()
            if np.shape(obs) == (1,20):
                break
            self.bt.next()
                
    def reset(self, seed=None):
        self.previous_equity = 10
        self.reset_backtesting()
        return self.bt._step_strategy.observation(), {}

    def render(self, mode='human'):
        result = self.bt.next(done=True)
        self.bt.plot(results=result, open_browser=False)
        
    def close(self):
        pass

In [5]:
import yfinance as yf
def get_stock_data(ticker, period="1y"):
    stock = yf.Ticker(ticker)
    df = stock.history(period=period)
    df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()
    return df


data = get_stock_data('GOOG')
print(data)


                                 Open        High         Low       Close  \
Date                                                                        
2023-11-21 00:00:00-05:00  137.602069  138.624552  137.367644  138.280396   
2023-11-22 00:00:00-05:00  138.759227  140.754327  138.659466  139.676971   
2023-11-24 00:00:00-05:00  139.198136  139.334809  137.133216  137.881378   
2023-11-27 00:00:00-05:00  137.232987  139.287938  137.203047  137.711807   
2023-11-28 00:00:00-05:00  137.292831  138.320306  136.704264  138.280396   
...                               ...         ...         ...         ...   
2024-11-14 00:00:00-05:00  179.750000  180.445007  176.029999  177.350006   
2024-11-15 00:00:00-05:00  175.639999  175.880005  172.744995  173.889999   
2024-11-18 00:00:00-05:00  174.955002  176.910004  174.419998  176.800003   
2024-11-19 00:00:00-05:00  175.235001  180.169998  175.115997  179.580002   
2024-11-20 00:00:00-05:00  178.830002  179.089996  175.330002  177.330002   

In [6]:
# Instantiate the env
bt = Backtest(data, periodicStrategy, cash=10000)
env = CustomEnv(bt)
# env = VecNormalize(env)

In [7]:
model = PPO("MlpPolicy", env, verbose=0, tensorboard_log="./logs/")

In [8]:
# Define and Train the agent
model.learn(total_timesteps=0)

Start with equity=10000.00
Action=None Equity=10000.00 Date=2023-11-22 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-11-24 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-11-27 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-11-28 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-11-29 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-11-30 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-01 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-04 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-05 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-06 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-07 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-08 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-11 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-12 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-13 00:00:00-05:00
Action=None Equity=10000.00 Date=2023-12-14 00:00:00-05:00
Action=None Equity=10000.00 D

<stable_baselines3.ppo.ppo.PPO at 0x1bfcf3adb40>

In [9]:
# Saving the model
# model.save("ppo_aapl")
# model = PPO.load("ppo_aapl", env=env) 