In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from collections import deque
import yfinance as yf
import pandas as pd

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [5]:
# Define the DQN Network
class DQNetwork(nn.Module):
    def __init__(self, input_dim, action_dim):
        super(DQNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Define a simple stock trading environment
class TradingEnv:
    def __init__(self, data):
        self.data = data
        self.current_step = 0
        self.initial_balance = 10000
        self.balance = self.initial_balance
        self.position = 0  # Shares held
        self.total_value = self.initial_balance  # Cash + held stocks

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.position = 0
        self.total_value = self.initial_balance
        return self.get_state()

    def step(self, action):
        if self.current_step >= len(self.data) - 1:
            # End the episode if we reach the last step
            done = True
            return self.get_state(), 0, done, {}

        current_price = self.data['Close'][self.current_step]
        reward = 0
        done = False

        # Execute action
        if action == 0:  # Buy
            if self.balance >= current_price:
                self.position += 1
                self.balance -= current_price
                reward = 1  # Incentivize buying

        elif action == 1:  # Sell
            if self.position > 0:
                self.position -= 1
                self.balance += current_price
                reward = 1  # Incentivize selling

        # Move to the next step and calculate total value
        self.total_value = self.balance + self.position * current_price
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1

        return self.get_state(), reward, done, {}


    def get_state(self):
        return np.array([
            self.balance,
            self.position,
            self.data['Open'][self.current_step],
            self.data['High'][self.current_step],
            self.data['Low'][self.current_step],
            self.data['Close'][self.current_step]
        ])


In [6]:
# Download stock data
def get_stock_data(ticker, period="1y"):
    stock = yf.Ticker(ticker)
    df = stock.history(period=period)
    df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()
    return df

# Train DQN
def train_dqn(env, model, target_model, episodes=100, gamma=0.99, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    replay_buffer = deque(maxlen=2000)
    batch_size = 32

    for episode in range(episodes):
        state = torch.FloatTensor(env.reset()).to(device)
        done = False
        total_reward = 0

        while not done:
            # ε-greedy policy for action selection
            if random.random() < epsilon:
                action = random.choice([0, 1])  # Random choice for Buy or Sell
            else:
                with torch.no_grad():
                    action = torch.argmax(model(state)).item()

            # Take action in environment
            next_state, reward, done, _ = env.step(action)
            next_state = torch.FloatTensor(next_state).to(device)
            reward = torch.tensor(reward, device=device, dtype=torch.float)
            total_reward += reward.item()

            # Store experience in replay buffer
            replay_buffer.append((state, action, reward, next_state, done))
            state = next_state

            # Experience Replay Training
            if len(replay_buffer) >= batch_size:
                batch = random.sample(replay_buffer, batch_size)
                states, actions, rewards, next_states, dones = zip(*batch)

                states = torch.stack(states).to(device)
                actions = torch.LongTensor(actions).to(device)
                rewards = torch.stack(rewards).to(device)
                next_states = torch.stack(next_states).to(device)
                dones = torch.BoolTensor(dones).to(device)

                # Compute Q targets
                with torch.no_grad():
                    q_next = target_model(next_states).max(1)[0]
                    q_targets = rewards + gamma * q_next * (~dones)

                # Get current Q values
                q_values = model(states).gather(1, actions.view(-1, 1)).squeeze()

                # Compute loss and update model
                loss = nn.MSELoss()(q_values, q_targets)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # Decrease epsilon for less exploration over time
        epsilon = max(epsilon * epsilon_decay, epsilon_min)

        # Update target network periodically
        if episode % 10 == 0:
            target_model.load_state_dict(model.state_dict())

        print(f"Episode {episode + 1}/{episodes} - Total Reward: {total_reward}, Epsilon: {epsilon}")


In [7]:
# Fetch data and initialize environment
ticker = "RELIANCE.NS"  # Example ticker
data = get_stock_data(ticker)
env = TradingEnv(data)

# Initialize models and move to device
input_dim = 6  # Example input dimensions
action_dim = 2  # Buy or Sell actions
model = DQNetwork(input_dim=input_dim, action_dim=action_dim).to(device)
target_model = DQNetwork(input_dim=input_dim, action_dim=action_dim).to(device)
target_model.load_state_dict(model.state_dict())

# Train the DQN model
train_dqn(env, model, target_model, episodes=100)


Episode 1/100 - Total Reward: 215.0, Epsilon: 0.995
Episode 2/100 - Total Reward: 214.0, Epsilon: 0.990025
Episode 3/100 - Total Reward: 228.0, Epsilon: 0.985074875
Episode 4/100 - Total Reward: 198.0, Epsilon: 0.9801495006250001
Episode 5/100 - Total Reward: 231.0, Epsilon: 0.9752487531218751
Episode 6/100 - Total Reward: 214.0, Epsilon: 0.9703725093562657
Episode 7/100 - Total Reward: 231.0, Epsilon: 0.9655206468094844
Episode 8/100 - Total Reward: 222.0, Epsilon: 0.960693043575437
Episode 9/100 - Total Reward: 197.0, Epsilon: 0.9558895783575597
Episode 10/100 - Total Reward: 216.0, Epsilon: 0.9511101304657719
Episode 11/100 - Total Reward: 206.0, Epsilon: 0.946354579813443
Episode 12/100 - Total Reward: 220.0, Epsilon: 0.9416228069143757
Episode 13/100 - Total Reward: 207.0, Epsilon: 0.9369146928798039
Episode 14/100 - Total Reward: 196.0, Epsilon: 0.9322301194154049
Episode 15/100 - Total Reward: 211.0, Epsilon: 0.9275689688183278
Episode 16/100 - Total Reward: 223.0, Epsilon: 0.92

In [8]:
from backtesting import Backtest, Strategy
from backtesting.lib import SignalStrategy, crossover

# Save the trained model
def save_model(model, path="dqn_trading_model.pth"):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

# Load the model
def load_model(path="dqn_trading_model.pth", input_dim=6, action_dim=2):
    model = DQNetwork(input_dim=input_dim, action_dim=action_dim)
    model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))
    model.eval()
    return model

In [17]:
class DQNStrategy(Strategy):
    def init(self):
        # Load the trained model
        self.model = load_model()

    def next(self):
        # Prepare the state for prediction
        state = [
            self.equity,                # Current cash balance
            self.position.size,              # Number of shares held (position size)
            self.data.Open[-1],              # Current Open price
            self.data.High[-1],              # Current High price
            self.data.Low[-1],               # Current Low price
            self.data.Close[-1]              # Current Close price
        ]
        
        # Convert state to tensor and get action from the model
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            action = torch.argmax(self.model(state_tensor)).item()

        # Take actions based on the model's output
        if action == 0 and not self.position:  # Buy if not holding any shares
            self.buy()
        elif action == 1 and self.position:   # Sell if holding shares
            self.sell()


In [None]:
# Fetch historical data
ticker = "RELIANCE.NS"  # Example ticker
data = get_stock_data(ticker)

# Instantiate and train the model as described before
env = TradingEnv(data)
input_dim = 6  # Number of features in state
action_dim = 2  # Number of actions (buy/sell)
model = DQNetwork(input_dim=input_dim, action_dim=action_dim).to(device)
target_model = DQNetwork(input_dim=input_dim, action_dim=action_dim).to(device)
target_model.load_state_dict(model.state_dict())

# Train the model
# train_dqn(env, model, target_model, episodes=100)

# Save the trained model
save_model(model)

# Load data into backtesting format
df = data[['Open', 'High', 'Low', 'Close', 'Volume']]

# Backtest using backtesting.py
bt = Backtest(df, DQNStrategy, cash=10000, commission=.002)
stats = bt.run()
print(stats)

# Visualize the results
bt.plot()

Model saved to dqn_trading_model.pth


  model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))


Start                     2023-11-13 00:00...
End                       2024-11-13 00:00...
Duration                    366 days 00:00:00
Exposure Time [%]                   99.190283
Equity Final [$]                 10802.539137
Equity Peak [$]                  13361.098707
Return [%]                           8.025391
Buy & Hold Return [%]                8.924027
Return (Ann.) [%]                    8.194332
Volatility (Ann.) [%]                22.87423
Sharpe Ratio                         0.358234
Sortino Ratio                        0.560835
Calmar Ratio                         0.427918
Max. Drawdown [%]                   -19.14932
Avg. Drawdown [%]                   -3.137705
Max. Drawdown Duration      128 days 00:00:00
Avg. Drawdown Duration       20 days 00:00:00
# Trades                                    1
Win Rate [%]                            100.0
Best Trade [%]                        8.57463
Worst Trade [%]                       8.57463
Avg. Trade [%]                    

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(


In [20]:
ticker = "ICICIBANK.NS"  # Example ticker

data = get_stock_data(ticker)
df = data[['Open', 'High', 'Low', 'Close', 'Volume']]

# Backtest using backtesting.py
bt = Backtest(df, DQNStrategy, cash=10000, commission=.002)
stats = bt.run()
print(stats)

# Visualize the results
bt.plot()

  model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(


Start                     2023-11-13 00:00...
End                       2024-11-13 00:00...
Duration                    366 days 00:00:00
Exposure Time [%]                   99.190283
Equity Final [$]                 13377.526857
Equity Peak [$]                  14056.026612
Return [%]                          33.775269
Buy & Hold Return [%]                35.31947
Return (Ann.) [%]                   34.565598
Volatility (Ann.) [%]               27.162052
Sharpe Ratio                         1.272569
Sortino Ratio                        2.634852
Calmar Ratio                          4.20836
Max. Drawdown [%]                   -8.213555
Avg. Drawdown [%]                   -2.829085
Max. Drawdown Duration       66 days 00:00:00
Avg. Drawdown Duration       20 days 00:00:00
# Trades                                    1
Win Rate [%]                            100.0
Best Trade [%]                      36.206643
Worst Trade [%]                     36.206643
Avg. Trade [%]                    

In [21]:
bt.plot(plot_volume=False)

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  fig = gridplot(
  fig = gridplot(
