<a href="https://colab.research.google.com/github/gaurav-jo1/Reinforcement-Learning/blob/main/Finance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gymnasium as gym
import numpy as np
import pandas as pd
import yfinance as yf
from gym import spaces

class TradingEnv(gym.Env):
    def __init__(self, ticker="AAPL", period="1y"):
        super(TradingEnv, self).__init__()
        # Fetch data
        self.data = yf.download(ticker, period=period)
        self.data["Return"] = self.data["Close"].pct_change()
        self.data["SMA_50"] = self.data["Close"].rolling(50).mean()
        self.data["RSI"] = self._compute_rsi(self.data["Close"])
        self.data = self.data.dropna()

        # State: [price, return, SMA_50, RSI]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32)
        self.action_space = spaces.Discrete(3)  # 0: hold, 1: buy, 2: sell

        self.current_step = 0
        self.cash = 10000  # Starting cash
        self.shares = 0
        self.max_steps = len(self.data) - 1

    def _compute_rsi(self, prices, period=14):
        delta = prices.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))

    def reset(self, seed=None):
        self.current_step = 0
        self.cash = 10000
        self.shares = 0
        return self._get_observation(), {}

    def _get_observation(self):
        row = self.data.iloc[self.current_step]
        return np.array([row["Close"], row["Return"], row["SMA_50"], row["RSI"]], dtype=np.float32)

    def step(self, action):
        current_price = self.data.iloc[self.current_step]["Close"]
        reward = 0

        if action == 1 and self.cash >= current_price:  # Buy
            self.shares += 1
            self.cash -= current_price
        elif action == 2 and self.shares > 0:  # Sell
            self.shares -= 1
            self.cash += current_price
            reward = current_price - self.data.iloc[self.current_step - 1]["Close"]  # Profit

        self.current_step += 1
        done = self.current_step >= self.max_steps
        next_state = self._get_observation() if not done else np.zeros(4)

        # Portfolio value as info
        info = {"portfolio_value": self.cash + self.shares * current_price}
        return next_state, reward, done, False, info

# Test it
env = TradingEnv("AAPL")
state, _ = env.reset()
for _ in range(5):
    action = env.action_space.sample()  # Random action
    next_state, reward, done, _, info = env.step(action)
    print(f"State: {next_state}, Reward: {reward}, Portfolio: {info['portfolio_value']}")