<a href="https://colab.research.google.com/github/efearase/RL_with_sentiment/blob/main/rl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

In [None]:
class StockTradingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, df, initial_balance=10000, lookback_window_size=50):
        super(StockTradingEnv, self).__init__()

        # Dataframe containing stock prices
        self.df = df
        self.lookback_window_size = lookback_window_size

        # Action space: 0 = Hold, 1 = Buy, 2 = Sell
        self.action_space = spaces.Discrete(3)

        # Observation space: Prices for the last N days + owned shares + cash balance
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(lookback_window_size + 2,), dtype=np.float32)

        # Initial conditions
        self.initial_balance = initial_balance
        self.reset()

    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.shares_held = 0
        self.current_step = 0

        # Return the initial observation
        return self._next_observation()

    def _next_observation(self):
        # Get the stock data for the last N days
        frame = self.df.loc[self.current_step:self.current_step + self.lookback_window_size].to_numpy()

        # Append additional data (shares held and balance)
        obs = np.append(frame, [[self.shares_held, self.balance]], axis=0)

        return obs.flatten()

    def step(self, action):
        # Execute one time step within the environment
        self.current_step += 1

        # Get current stock price
        current_price = self.df.loc[self.current_step, 'Close']

        reward = 0

        if action == 1:  # Buy
            # Calculate the number of shares to buy
            total_possible = self.balance // current_price
            self.shares_held += total_possible
            self.balance -= total_possible * current_price

        elif action == 2:  # Sell
            # Sell all shares
            self.balance += self.shares_held * current_price
            reward += self.shares_held * current_price
            self.shares_held = 0

        # Update net worth
        self.net_worth = self.balance + self.shares_held * current_price

        done = self.net_worth <= 0 or self.current_step > len(self.df) - self.lookback_window_size - 1

        return self._next_observation(), reward, done, {}

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Net worth: {self.net_worth}')

In [None]:
# env = StockTradingEnv(df)

# env = make_vec_env(lambda: env, n_envs=1)

# model = PPO("MlpPolicy", env, verbose=1)
# model.learn(total_timesteps=20000)

# obs = env.reset()
# for i in range(1000):
#     action, _states = model.predict(obs, deterministic=True)
#     obs, rewards, dones, info = env.step(action)
#     env.render()  # Assuming your environment supports rendering