In [None]:
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import MinMaxScaler
import torch.functional as F

In [2]:
from gym.spaces import Discrete, Box
import gym
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("smh_stocks_max.csv")
df = df.fillna(0)
df["volume"] = df["volume"].astype(float)
# print(df.head())
# print(df.info())
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
close = df["close"].to_numpy()
tsa = seasonal_decompose(close, model="additive", period=180)
df["trend"] = tsa.trend
df["seasonal"] = tsa.seasonal
df["residual"] = tsa.resid
df = df.fillna(0)

In [5]:
features = df[['volume', 'macd_hist', "macd", "signal_line", "%K", "%D", "ema_200",
       'rsi', "roc", 'bb_upper', 'bb_lower']]
targets = df[["trend","seasonal","residual"]]   

train_size = int(0.8 * len(features))
X_train_set = features.iloc[:train_size, :]
X_test_set = features.iloc[train_size:, :]

y_train_set = targets.iloc[:train_size, :]
y_test_set = targets.iloc[train_size:, :]

scaler = MinMaxScaler((0,1))

X_train_set = scaler.fit_transform(X_train_set.fillna(np.nan).to_numpy())
X_test_set = scaler.transform(X_test_set.fillna(np.nan).to_numpy())

y_train_set = scaler.fit_transform(y_train_set.fillna(np.nan).to_numpy())
y_test_set = scaler.transform(y_test_set.fillna(np.nan).to_numpy())

In [7]:
X_train_set.shape

(4918, 11)

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :]) 
        return out

In [141]:
class StockTradingEnv(gym.Env):
    def __init__(self, data, model, scaler, seq_length=20):
        """
            Initialise the Environment with the data, model, seq_length, current_step, action and
            observation spaces
        """

        super(StockTradingEnv, self).__init__()
        self.data = data
        self.model = model
        self.seq_length = seq_length
        self.current_step = seq_length
        self.scaler = scaler
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(low=0, high=1, shape=(seq_length,11), dtype=np.float32())

    def reset(self):
        """
            Resets the position or state to the start of the sequence that is before sequence length
            steps before the current step
        """

        self.current_step = self.seq_length
        return self.data[self.current_step - self.seq_length:self.current_step]
    
    def step(self, action):
        # Get the last 20 days of data for prediction
        input_data = self.data[self.current_step - self.seq_length:self.current_step].reshape(1, self.seq_length, 11)
        input_tensor = torch.FloatTensor(input_data).to(device)

        # Use the LSTM model to predict the next price
        with torch.no_grad():
            predicted_price = self.model(input_tensor).item()

        prev_price = self.data[self.current_step, 0]  # Assuming the price is the first feature
        self.current_step += 1

        if self.current_step >= len(self.data):
            done = True
            self.current_step = len(self.data) - 1
        else:
            done = False

        current_price = self.data[self.current_step, 0]  # Current price

        # Define a reward function based on the action taken
        reward = 0
        if action == 1:  # Buy
            reward = current_price - predicted_price
        else:  # Sell
            reward = predicted_price - current_price

        # Ensure the observation is in the correct shape
        obs = self.data[self.current_step - self.seq_length:self.current_step]
        
        return obs, reward, done, {}

    def render(self, mode='human'):
        pass  # Implement if you want to visualize the environment

    def close(self):
        pass  # Clean up if needed


In [163]:
import torch
import torch.nn as nn
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3 import PPO
from gym import spaces
import gym
import numpy as np

class CustomLSTMPolicy(ActorCriticPolicy):
    def __init__(self, observation_space, action_space, lr_schedule, *args, **kwargs):
        super(CustomLSTMPolicy, self).__init__(observation_space, action_space, lr_schedule, *args, **kwargs)

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size=observation_space.shape[-1], hidden_size=256, num_layers=1, batch_first=True)

        # Actor and critic layers for action and value prediction
        self.actor = nn.Linear(256, action_space.n)
        self.critic = nn.Linear(256, 1)

    def forward(self, obs, lstm_hidden_state=None):
        """
        Process the observation with the LSTM, then pass through actor and critic heads.
        """
        print(f"Original observation shape: {obs.shape}")  # Diagnostic print
        
        # Ensure the input tensor is 3D: (batch_size, sequence_length, input_size)
        if obs.dim() == 4:
            obs = obs.squeeze(1)  # Squeeze the second dimension if it’s 1
        elif obs.dim() == 2:
            # Reshape (batch_size, input_size) to (batch_size, sequence_length=1, input_size)
            obs = obs.unsqueeze(1)
        
        print(f"Reshaped observation shape for LSTM: {obs.shape}")  # Diagnostic print

        # Pass through LSTM
        lstm_out, self.hidden_state = self.lstm(obs, lstm_hidden_state)
        lstm_out = lstm_out[:, -1, :]  # Use the last timestep's output

        # Compute actor and critic values
        action_logits = self.actor(lstm_out)
        value = self.critic(lstm_out)

        return action_logits, value



In [None]:
class StockPredictionEnv(gym.Env):
    def __init__(self, data, seq_length=20):
        super(StockPredictionEnv, self).__init__()
        self.data = data
        self.seq_length = seq_length
        self.current_step = seq_length

        # Define action and observation spaces
        self.action_space = spaces.Discrete(3)
        # Define the shape of the observation as (sequence_length, features)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(seq_length, data.shape[1]), dtype=np.float32)

    def reset(self):
        self.current_step = self.seq_length
        return self.data[self.current_step - self.seq_length:self.current_step]  # Shape: (seq_length, features)

    def step(self, action):
        current_price = self.data[self.current_step, 0]  # Assumes first column is the price
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1

        next_price = self.data[self.current_step, 0]
        obs = self.data[self.current_step - self.seq_length:self.current_step]  # Shape: (seq_length, features)

        # Define reward based on action: 1 for buy, 2 for sell
        reward = 0
        if action == 1:  # Buy
            reward = next_price - current_price
        elif action == 2:  # Sell
            reward = current_price - next_price

        # Initialize environment and model
env = StockPredictionEnv(data=X_train_set, seq_length=20)
model = PPO(CustomLSTMPolicy, env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)
return obs, reward, done, {}


In [164]:
# Initialize environment and model
env = StockPredictionEnv(data=X_train_set, seq_length=20)
model = PPO(CustomLSTMPolicy, env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Reset observation shape: (20, 11)
Original observation shape: torch.Size([1, 20, 11])
Reshaped observation shape for LSTM: torch.Size([1, 20, 11])


ValueError: not enough values to unpack (expected 3, got 2)

In [162]:
class StockPredictionEnv(gym.Env):
    def __init__(self, data, seq_length=20):
        super(StockPredictionEnv, self).__init__()
        self.data = data
        self.seq_length = seq_length
        self.current_step = seq_length

        # Define action and observation spaces
        self.action_space = spaces.Discrete(3)
        # Set observation space shape as (sequence_length, features)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(seq_length, data.shape[1]), dtype=np.float32)

    def reset(self):
        self.current_step = self.seq_length
        obs = self.data[self.current_step - self.seq_length:self.current_step]
        print(f"Reset observation shape: {obs.shape}")  # Diagnostic print
        return obs  # Expecting (seq_length, features)

    def step(self, action):
        current_price = self.data[self.current_step, 0]
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1

        next_price = self.data[self.current_step, 0]
        obs = self.data[self.current_step - self.seq_length:self.current_step]
        print(f"Step observation shape: {obs.shape}")  # Diagnostic print

        reward = 0
        if action == 1:
            reward = next_price - current_price
        elif action == 2:
            reward = current_price - next_price

        return obs, reward, done, {}


In [161]:
# Initialize environment and model
env = StockPredictionEnv(data=X_train_set, seq_length=20)
model = PPO(CustomLSTMPolicy, env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead

In [159]:
# Initialize environment and model
env = StockPredictionEnv(data=X_train_set, seq_length=20)

# Use the custom LSTM policy
model = PPO(CustomLSTMPolicy, env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)



Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead

In [111]:
X_train_set[40:60].shape

(20, 11)

In [75]:
def make_sequences(data, data_close, seq_length):
    sequences = []
    targets = []
    i=0
    for i in range(seq_length, len(data)-20):
        sequences.append(data[i:i+seq_length])
        targets.append(data_close[i+seq_length])

    return np.array(sequences), np.array(targets)

seq_length = 20
X, y = make_sequences(X_train_set, y_train_set, 20)

In [69]:
y_train_set.shape

(4918, 3)

In [125]:
model = LSTM(11,256,3).to(device)

In [139]:
from stable_baselines3 import PPO

# Initialize the environment with the LSTM model
env = StockTradingEnv(data=X_train_set, model=model, scaler=scaler)
env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)
# Define the A2C model
a2c_model = PPO("MlpPolicy", env, verbose=1)

# Train the A2C model
a2c_model.learn(total_timesteps=10000)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




ValueError: not enough values to unpack (expected 5, got 4)

In [94]:
torch.zeros(3,20,11).shape

torch.Size([3, 20, 11])