# Reinforcement Learning Trading Environment
This notebook sets up a custom trading environment for reinforcement learning using OpenAI Gym.
We will:
- Load preprocessed trading data.
- Define the trading environment (state, actions, rewards).
- Implement a Gym-compatible RL environment.

In [None]:
import numpy as np
import pandas as pd
import gym
from gym import spaces

# Load preprocessed data
train_df = pd.read_csv('train_data.csv')
test_df = pd.read_csv('test_data.csv')

# Convert datadate to datetime format
train_df['datadate'] = pd.to_datetime(train_df['datadate'])
test_df['datadate'] = pd.to_datetime(test_df['datadate'])

class TradingEnv(gym.Env):
    """Custom Trading Environment for RL"""
    def __init__(self, data):
        super(TradingEnv, self).__init__()
        self.data = data
        self.current_step = 0
        self.cash = 10000  # Initial cash balance
        self.holdings = 0  # Number of shares held
        self.action_space = spaces.Discrete(3)  # Buy, Hold, Sell
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(len(data.columns) - 2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.cash = 10000
        self.holdings = 0
        return self._next_observation()

    def _next_observation(self):
        return self.data.iloc[self.current_step, 2:].values

    def step(self, action):
        price = self.data.iloc[self.current_step]['adjcp']

        if action == 0:  # Buy
            self.holdings += self.cash / price
            self.cash = 0
        elif action == 1:  # Sell
            self.cash += self.holdings * price
            self.holdings = 0

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        reward = self.cash + (self.holdings * price) - 10000  # Net worth difference
        return self._next_observation(), reward, done, {}

    def render(self):
        print(f'Step: {self.current_step}, Cash: {self.cash}, Holdings: {self.holdings}')

# Initialize environment
env = TradingEnv(train_df)
state = env.reset()
print("Sample state:", state)