## Load Datasets and Libraries

---------------------------------

In [26]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random

from sklearn.model_selection import train_test_split

In [27]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
# Define the paths to the datasets
dataset1_path = r"/content/drive/MyDrive/FYP/AMZN_cleaned.csv"
dataset2_path = r"/content/drive/MyDrive/FYP/GOOGL_cleaned.csv"
dataset3_path = r"/content/drive/MyDrive/FYP/MSFT_cleaned.csv"

In [29]:
amzn_df = pd.read_csv(dataset1_path)
googl_df = pd.read_csv(dataset2_path)
msft_df = pd.read_csv(dataset3_path)

In [30]:
amzn_df.head()

Unnamed: 0,Open,High,Low,Close,SMA,EMA,MACD
0,14.15,14.21,13.3555,13.638,13.426175,13.527232,0.161967
1,13.552,13.797,13.185,13.275,13.44665,13.488427,0.129849
2,13.4465,13.4465,13.14,13.25,13.46295,13.451746,0.10121
3,13.139,13.234,12.9535,12.999,13.465025,13.382093,0.057597
4,13.1,13.4015,13.073,13.3445,13.4611,13.376309,0.050332


In [31]:
googl_df.head()

Unnamed: 0,Open,High,Low,Close,SMA,EMA,MACD
0,18.862362,19.042793,18.841593,18.864614,18.346121,18.517347,0.163696
1,18.781532,18.959459,18.775024,18.911161,18.386712,18.577934,0.182985
2,18.973974,19.434435,18.971472,19.409409,18.451689,18.705853,0.235759
3,19.21146,19.281031,18.975725,18.994493,18.478028,18.750259,0.241321
4,19.047297,19.297047,19.005756,19.162663,18.516804,18.813706,0.256344


In [32]:
msft_df.head()

Unnamed: 0,Open,High,Low,Close,SMA,EMA,MACD
0,28.01,28.190001,27.76,27.85,27.226,27.569285,0.130848
1,27.790001,27.969999,27.4,27.450001,27.2175,27.550934,0.11164
2,27.67,28.049999,27.549999,27.93,27.2515,27.609251,0.133609
3,27.870001,28.02,27.42,27.440001,27.2865,27.583213,0.110211
4,27.620001,27.66,27.360001,27.5,27.327,27.570411,0.095409


## Model Construction and Training

----------------------------------------------------

### Define Trading Environment

In [33]:
class TradingEnv:
    def __init__(self, data, initial_capital=100):
        self.data = data
        self.state_space = ['Open', 'High', 'Low', 'Close', 'SMA', 'EMA', 'MACD']
        self.action_space = ['Sell', 'Hold', 'Buy']
        self.initial_capital = initial_capital
        self.reset()

    def reset(self):
        self.current_step = 0
        self.portfolio_value = self.initial_capital
        self.holding = False
        self.buy_price = 0.0

    def step(self, action):
        current_price = self.data['Close'].iloc[self.current_step]
        reward = 0

        if action == 2:  # Buy
            if not self.holding:
                self.buy_price = current_price
                self.holding = True

        elif action == 0:  # Sell
            if self.holding:
                reward = current_price - self.buy_price
                self.portfolio_value += reward
                self.holding = False

        self.current_step += 1
        done = self.current_step == len(self.data) - 1

        return reward, done

    def get_state(self):
        return self.data.iloc[self.current_step][self.state_space].values

    def get_portfolio_value(self):
        return self.portfolio_value

### Construct DQN-LSTM Model

In [34]:
class DQN_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DQN_LSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h_0 = torch.zeros(1, x.size(0), hidden_dim).to(x.device)
        c_0 = torch.zeros(1, x.size(0), hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out


### Train Model


In [35]:
def train_dqn_lstm(env, model, optimizer, criterion, num_episodes=5, batch_size=64, gamma=0.95, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01):
    memory = deque(maxlen=2000)

    for episode in range(num_episodes):
        env.reset()
        state = torch.tensor(np.array([env.get_state()]), dtype=torch.float32).to(device)
        total_reward = 0

        while True:
            if np.random.rand() <= epsilon:
                action = np.random.choice(len(env.action_space))
            else:
                with torch.no_grad():
                    q_values = model(state.unsqueeze(0))
                    action = torch.argmax(q_values).item()

            reward, done = env.step(action)
            total_reward += reward
            next_state = torch.tensor([env.get_state()], dtype=torch.float32).to(device)
            memory.append((state, action, reward, next_state, done))
            state = next_state

            if done:
                break

            if len(memory) > batch_size:
                batch = random.sample(memory, batch_size)
                for s, a, r, s_next, d in batch:
                    q_update = r
                    if not d:
                        q_update = (r + gamma * torch.max(model(s_next.unsqueeze(0))).item())
                    q_values = model(s.unsqueeze(0))
                    q_values[0][a] = q_update
                    optimizer.zero_grad()
                    loss = criterion(model(s.unsqueeze(0)), q_values)
                    loss.backward()
                    optimizer.step()

        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

    return model

## Define Performance Metrics

----------------------------------------------------

In [36]:
def evaluate_dqn_lstm(env, model):
    env.reset()
    state = torch.tensor(np.array([env.get_state()]), dtype=torch.float32).to(device)
    portfolio_values = [env.initial_capital]

    while True:
        with torch.no_grad():
            q_values = model(state.unsqueeze(0))
            action = torch.argmax(q_values).item()

        reward, done = env.step(action)
        portfolio_values.append(env.get_portfolio_value())
        state = torch.tensor([env.get_state()], dtype=torch.float32).to(device)

        if done:
            break

    return portfolio_values

def calculate_performance_metrics(portfolio_values, risk_free_rate = 0.01):
    portfolio_values = np.array(portfolio_values)

    # Cumulative Return
    initial_value = portfolio_values[0]
    final_value = portfolio_values[-1]
    cumulative_return = ((final_value - initial_value) / initial_value) * 100

    # Sharpe Ratio
    returns = np.diff(portfolio_values) / portfolio_values[:-1]
    excess_returns = returns - risk_free_rate / 252  # Convert annual risk-free rate to daily
    mean_excess_return = np.mean(excess_returns)
    std_excess_return = np.std(excess_returns)

    # Handle the case where std_excess_return is zero
    if std_excess_return != 0:
        sharpe_ratio = mean_excess_return / std_excess_return * np.sqrt(252)
    else:
        sharpe_ratio = np.nan  # Avoid division by zero

    # Maximum Drawdown
    peak_values = np.maximum.accumulate(portfolio_values)
    drawdowns = (peak_values - portfolio_values) / peak_values
    maximum_drawdown = drawdowns.max() * 100

    return cumulative_return, sharpe_ratio, maximum_drawdown


## Evaluate Model

----------------------------------------------------

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

input_dim = len(['Open', 'High', 'Low', 'Close', 'SMA', 'EMA', 'MACD'])
hidden_dim = 64
output_dim = 3  # Buy, Sell, Hold
learning_rate = 0.001

datasets = {
    'AMZN': amzn_df,
    'GOOGL': googl_df,
    'MSFT': msft_df
}

for name, data in datasets.items():
    # Clean the data to remove any infinities or NaNs
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(inplace=True)

    env = TradingEnv(data, initial_capital=100)
    model = DQN_LSTM(input_dim, hidden_dim, output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    # Train the model
    model = train_dqn_lstm(env, model, optimizer, criterion)

    # Evaluate the model
    portfolio_values = evaluate_dqn_lstm(env, model)
    total_profits = portfolio_values[-1] - env.initial_capital
    cr, sr, mdd = calculate_performance_metrics(portfolio_values)

    results.append({
        'Dataset': name,
        'Total Profits ($)': total_profits,
        'Cumulative Return (%)': cr,
        'Sharpe Ratio': sr,
        'Maximum Drawdown (%)': mdd
    })

results_df = pd.DataFrame(results)

# Display the results DataFrame
print(results_df)

## Results Analysis

----------------------------------------------------

In [None]:
results_df