## Load Libraries and Datasets

---------------------------------

In [48]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random

import skopt
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args

from tabulate import tabulate

In [50]:
amzn_df = pd.read_csv('AMZN_cleaned.csv')
googl_df = pd.read_csv('GOOGL_cleaned.csv')
msft_df = pd.read_csv('MSFT_cleaned.csv')

In [51]:
amzn_df.head()

Unnamed: 0,Open,High,Low,Close,SMA,EMA,MACD,Signal Line
0,14.15,14.21,13.3555,13.638,13.426175,13.527232,0.161967,0.174111
1,13.552,13.797,13.185,13.275,13.44665,13.488427,0.129849,0.165259
2,13.4465,13.4465,13.14,13.25,13.46295,13.451746,0.10121,0.152449
3,13.139,13.234,12.9535,12.999,13.465025,13.382093,0.057597,0.133479
4,13.1,13.4015,13.073,13.3445,13.4611,13.376309,0.050332,0.116849


In [52]:
googl_df.head()

Unnamed: 0,Open,High,Low,Close,SMA,EMA,MACD,Signal Line
0,18.862362,19.042793,18.841593,18.864614,18.346121,18.517347,0.163696,0.077188
1,18.781532,18.959459,18.775024,18.911161,18.386712,18.577934,0.182985,0.098348
2,18.973974,19.434435,18.971472,19.409409,18.451689,18.705853,0.235759,0.12583
3,19.21146,19.281031,18.975725,18.994493,18.478028,18.750259,0.241321,0.148928
4,19.047297,19.297047,19.005756,19.162663,18.516804,18.813706,0.256344,0.170411


In [53]:
msft_df.head()

Unnamed: 0,Open,High,Low,Close,SMA,EMA,MACD,Signal Line
0,28.01,28.190001,27.76,27.85,27.226,27.569285,0.130848,0.008465
1,27.790001,27.969999,27.4,27.450001,27.2175,27.550934,0.11164,0.0291
2,27.67,28.049999,27.549999,27.93,27.2515,27.609251,0.133609,0.050002
3,27.870001,28.02,27.42,27.440001,27.2865,27.583213,0.110211,0.062044
4,27.620001,27.66,27.360001,27.5,27.327,27.570411,0.095409,0.068717


## Data Partitioning

----------------------------------------------------

### Sequential Train-Test Split (80:20)

In [56]:
def split_train_test(data, train_size=0.8):
    split_index = int(len(data) * train_size)
    train_data = data.iloc[:split_index]
    test_data = data.iloc[split_index:]
    return train_data, test_data

## Model Construction and Training

----------------------------------------------------

### Trading Environment

In [59]:
class TradingEnv:
    def __init__(self, data, initial_capital=100):
        self.data = data
        self.state_space = ['Open', 'High', 'Low', 'Close', 'SMA', 'EMA', 'MACD', 'Signal Line']
        self.action_space = ['Sell', 'Hold', 'Buy']
        self.initial_capital = initial_capital
        self.reset()

    def reset(self):
        self.current_step = 0
        self.portfolio_value = self.initial_capital
        self.holding = False
        self.buy_price = 0.0

    def step(self, action):
        current_price = self.data['Close'].iloc[self.current_step]
        reward = 0

        if action == 2:  # Buy
            if not self.holding:
                self.buy_price = current_price
                self.holding = True

        elif action == 0:  # Sell
            if self.holding:
                reward = current_price - self.buy_price
                self.portfolio_value += reward
                self.holding = False

        self.current_step += 1
        done = self.current_step == len(self.data) - 1

        return reward, done

    def get_state(self):
        return self.data.iloc[self.current_step][self.state_space].values

    def get_portfolio_value(self):
        return self.portfolio_value

### DQN-LSTM Model

In [61]:
class DQN_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DQN_LSTM, self).__init__()
        self.hidden_dim = hidden_dim 
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h_0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
        c_0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out

### Training and Testing Function

In [64]:
def evaluate_dqn_lstm(env, model, optimizer, criterion, num_episodes=5, batch_size=64, gamma=0.95, 
                      epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, training=True):
    memory = deque(maxlen=2000)
    best_portfolio_value = 0
    best_portfolio_values = []

    for episode in range(num_episodes):
        env.reset()
        state = torch.tensor(np.array([env.get_state()]), dtype=torch.float32).to(device)
        total_reward = 0
        portfolio_values = [env.get_portfolio_value()]

        while True:
            if np.random.rand() <= epsilon:
                action = np.random.choice(len(env.action_space))
            else:
                with torch.no_grad():
                    q_values = model(state.unsqueeze(0))
                    action = torch.argmax(q_values).item()

            reward, done = env.step(action)
            total_reward += reward
            next_state = torch.tensor([env.get_state()], dtype=torch.float32).to(device)
            memory.append((state, action, reward, next_state, done))
            state = next_state

            portfolio_values.append(env.get_portfolio_value())

            if done:
                final_portfolio_value = env.get_portfolio_value()
                if final_portfolio_value > best_portfolio_value:
                    best_portfolio_value = final_portfolio_value
                    best_portfolio_values = portfolio_values
                print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}, Portfolio Value: {final_portfolio_value}")
                break

            if training and len(memory) > batch_size:
                batch = random.sample(memory, batch_size)
                for s, a, r, s_next, d in batch:
                    q_update = r
                    if not d:
                        q_update = (r + gamma * torch.max(model(s_next.unsqueeze(0))).item())
                    q_values = model(s.unsqueeze(0))
                    q_values[0][a] = q_update
                    optimizer.zero_grad()
                    loss = criterion(model(s.unsqueeze(0)), q_values)
                    loss.backward()
                    optimizer.step()

        if training and epsilon > epsilon_min:
            epsilon *= epsilon_decay

    return model, best_portfolio_value, best_portfolio_values


### Performance Metrics Function

In [66]:
def calculate_performance_metrics(portfolio_values, risk_free_rate=0.01):
    portfolio_values = np.array(portfolio_values)

    # Cumulative Return
    initial_value = portfolio_values[0]
    final_value = portfolio_values[-1]
    cumulative_return = ((final_value - initial_value) / initial_value) * 100

    # Sharpe Ratio
    returns = np.diff(portfolio_values) / portfolio_values[:-1]
    excess_returns = returns - risk_free_rate / 252  # Convert annual risk-free rate to daily
    mean_excess_return = np.mean(excess_returns)
    std_excess_return = np.std(excess_returns)
    if std_excess_return != 0:
        sharpe_ratio = mean_excess_return / std_excess_return * np.sqrt(252)
    else:
        sharpe_ratio = np.nan  # Avoid division by zero

    # Maximum Drawdown
    peak_values = np.maximum.accumulate(portfolio_values)
    drawdowns = (peak_values - portfolio_values) / peak_values
    maximum_drawdown = drawdowns.max() * 100

    return cumulative_return, sharpe_ratio, maximum_drawdown


## Model Evaluation

----------------------------------------------------

In [94]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

input_dim = len(['Open', 'High', 'Low', 'Close', 'SMA', 'EMA', 'MACD', 'Signal Line'])
hidden_dim = 64
output_dim = 3  # Buy, Sell, Hold
learning_rate = 0.001

datasets = {
    'AMZN': amzn_df,
    'GOOGL': googl_df,
    'MSFT': msft_df
}

for name, data in datasets.items():
    # Clean the data to remove any infinities or NaNs
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(inplace=True)

    # Split data into training and testing sets using the function
    train_data, test_data = split_train_test(data)

    # Train the model on the training set
    train_env = TradingEnv(train_data, initial_capital=100)
    model = DQN_LSTM(input_dim, hidden_dim, output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()
    model, _, _ = evaluate_dqn_lstm(train_env, model, optimizer, criterion, training=True)


    # Evaluate the model on the testing set
    test_env = TradingEnv(test_data, initial_capital=100)
    _, best_portfolio_value, best_portfolio_values = evaluate_dqn_lstm(test_env, model, optimizer, criterion, training=False)

    # Evaluate the model using the best portfolio values achieved
    final_portfolio_value = best_portfolio_value
    initial_capital = test_env.initial_capital

    total_profits = final_portfolio_value - initial_capital
    cr, sr, mdd = calculate_performance_metrics(best_portfolio_values)

    results.append({
        'Dataset': name,
        'Initial Capital': initial_capital,
        'Final Portfolio Value': final_portfolio_value,
        'Total Profits ($)': total_profits,
        'Cumulative Return (%)': cr,
        'Sharpe Ratio': sr,
        'Maximum Drawdown (%)': mdd
    })

results_df = pd.DataFrame(results)

# Display the results DataFrame
print(results_df)

Episode 1/5, Total Reward: 180.33197099999995, Portfolio Value: 280.33197099999984
Episode 2/5, Total Reward: 77.970026, Portfolio Value: 177.97002599999993
Episode 3/5, Total Reward: 81.85746899999995, Portfolio Value: 181.85746899999998
Episode 4/5, Total Reward: 21.579003999999983, Portfolio Value: 121.57900400000004
Episode 5/5, Total Reward: 112.41790699999996, Portfolio Value: 212.41790699999987
Episode 1/5, Total Reward: 27.608016000000063, Portfolio Value: 127.60801600000012
Episode 2/5, Total Reward: 29.466445999999905, Portfolio Value: 129.4664459999999
Episode 3/5, Total Reward: -13.527983999999961, Portfolio Value: 86.47201600000004
Episode 4/5, Total Reward: 48.45391100000003, Portfolio Value: 148.453911
Episode 5/5, Total Reward: -52.50999800000008, Portfolio Value: 47.49000199999992
Episode 1/5, Total Reward: 74.40878400000005, Portfolio Value: 174.40878400000014
Episode 2/5, Total Reward: 42.54997299999993, Portfolio Value: 142.54997300000002
Episode 3/5, Total Reward: 

## Hyperparameter Optimization

----------------------------------------------------

## Bayesian Optimization



In [97]:
# Bayesian Optimization
search_space = [
    Integer(32, 256, name='hidden_dim'),
    Real(1e-5, 1e-1, "log-uniform", name='learning_rate'),
    Real(0.8, 1.0, name='gamma'),
    Real(0.01, 1.0, name='epsilon_decay')
]

@use_named_args(search_space)
def objective(hidden_dim, learning_rate, gamma, epsilon_decay):
    hidden_dim = int(hidden_dim)  # Ensure hidden_dim is an integer
    model = DQN_LSTM(input_dim, hidden_dim, output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    # Train the model on the training data
    model, _, _ = evaluate_dqn_lstm(train_env, model, optimizer, criterion, gamma=gamma, epsilon_decay=epsilon_decay, training=True)

    # Test the model on the testing data
    test_env = TradingEnv(test_data, initial_capital=100)
    _, best_portfolio_value, best_portfolio_values = evaluate_dqn_lstm(test_env, model, optimizer, criterion, gamma=gamma, epsilon_decay=epsilon_decay, training=False)

    # Calculate cumulative return
    cr, _, _ = calculate_performance_metrics(best_portfolio_values)
    return -cr

results_bo = []

for name, data in datasets.items():
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(inplace=True)

    train_data, test_data = split_train_test(data)
    train_env = TradingEnv(train_data, initial_capital=100)

    res = gp_minimize(objective, search_space, n_calls=10, random_state=0)
    best_params = {dim.name: val for dim, val in zip(search_space, res.x)}

    hidden_dim = int(best_params['hidden_dim'])
    learning_rate = best_params['learning_rate']
    gamma = best_params['gamma']
    epsilon_decay = best_params['epsilon_decay']

    model = DQN_LSTM(input_dim, hidden_dim, output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    # Train the model on the training data
    model, _, _ = evaluate_dqn_lstm(train_env, model, optimizer, criterion, gamma=gamma, epsilon_decay=epsilon_decay, training=True)

    # Test the model on the testing data
    test_env = TradingEnv(test_data, initial_capital=100)
    _, best_portfolio_value, best_portfolio_values = evaluate_dqn_lstm(test_env, model, optimizer, criterion, gamma=gamma, epsilon_decay=epsilon_decay, training=False)

    final_portfolio_value = best_portfolio_value
    initial_capital = test_env.initial_capital

    total_profits = final_portfolio_value - initial_capital
    cr, sr, mdd = calculate_performance_metrics(best_portfolio_values)

    results_bo.append({
        'Dataset': name,
        'Initial Capital': initial_capital,
        'Final Portfolio Value': final_portfolio_value,
        'Total Profits ($)': total_profits,
        'Cumulative Return (%)': cr,
        'Sharpe Ratio': sr,
        'Maximum Drawdown (%)': mdd,
        'Best Params': best_params
    })

Episode 1/5, Total Reward: 49.139976000000146, Portfolio Value: 149.13997600000025
Episode 2/5, Total Reward: 46.017896000000164, Portfolio Value: 146.01789600000012
Episode 3/5, Total Reward: 64.43655100000001, Portfolio Value: 164.43655099999995
Episode 4/5, Total Reward: 6.569532000000159, Portfolio Value: 106.56953200000027
Episode 5/5, Total Reward: 52.51956899999992, Portfolio Value: 152.51956900000002
Episode 1/5, Total Reward: 0.8495100000001798, Portfolio Value: 100.84951000000018
Episode 2/5, Total Reward: -10.048981999999882, Portfolio Value: 89.95101800000012
Episode 3/5, Total Reward: -79.37297000000001, Portfolio Value: 20.62702999999992
Episode 4/5, Total Reward: 16.734978999999925, Portfolio Value: 116.73497899999992
Episode 5/5, Total Reward: -20.03351000000012, Portfolio Value: 79.96648999999988
Episode 1/5, Total Reward: 86.14263500000003, Portfolio Value: 186.14263500000004
Episode 2/5, Total Reward: 81.21907100000006, Portfolio Value: 181.21907100000013
Episode 3/5

In [98]:
print(results_bo)

[{'Dataset': 'AMZN', 'Initial Capital': 100, 'Final Portfolio Value': 134.05394299999995, 'Total Profits ($)': 34.05394299999995, 'Cumulative Return (%)': 34.05394299999995, 'Sharpe Ratio': 0.5470904595942767, 'Maximum Drawdown (%)': 25.82482732190047, 'Best Params': {'hidden_dim': 114, 'learning_rate': 0.06739390723749777, 'gamma': 0.8280701560825291, 'epsilon_decay': 0.8713863857748523}}, {'Dataset': 'GOOGL', 'Initial Capital': 100, 'Final Portfolio Value': 133.286588, 'Total Profits ($)': 33.286587999999995, 'Cumulative Return (%)': 33.286587999999995, 'Sharpe Ratio': 0.5532137797793807, 'Maximum Drawdown (%)': 25.931165183615647, 'Best Params': {'hidden_dim': 138, 'learning_rate': 0.01598243724741151, 'gamma': 0.904095495910241, 'epsilon_decay': 0.6820907348177708}}, {'Dataset': 'MSFT', 'Initial Capital': 100, 'Final Portfolio Value': 239.07992699999994, 'Total Profits ($)': 139.07992699999994, 'Cumulative Return (%)': 139.07992699999994, 'Sharpe Ratio': 1.0243577849257843, 'Maximu

## Results Analysis

----------------------------------------------------

In [100]:
# Extracting data for table display
table_data = []
for result in results_bo:
    row = [
        result['Dataset'],
        result['Initial Capital'],
        result['Final Portfolio Value'],
        result['Total Profits ($)'],
        result['Cumulative Return (%)'],
        result['Sharpe Ratio'],
        result['Maximum Drawdown (%)']
    ]
    table_data.append(row)

# Defining headers for the table
headers = [
    'Dataset',
    'Initial Capital',
    'Final Portfolio Value',
    'Total Profits ($)',
    'Cumulative Return (%)',
    'Sharpe Ratio',
    'Maximum Drawdown (%)'
]

In [101]:
# Display the combined DataFrame
print("Results without Bayesian Optimization: ")
display(results_df)

Results without Bayesian Optimization: 


Unnamed: 0,Dataset,Initial Capital,Final Portfolio Value,Total Profits ($),Cumulative Return (%),Sharpe Ratio,Maximum Drawdown (%)
0,AMZN,100,148.453911,48.453911,48.453911,0.865319,31.756736
1,GOOGL,100,128.749495,28.749495,28.749495,0.529728,20.416285
2,MSFT,100,270.989703,170.989703,170.989703,1.396928,20.278057


In [102]:
# Printing the table using tabulate
print("Results with Bayesian Optimization: ")
print(tabulate(table_data, headers=headers))

Results with Bayesian Optimization: 
Dataset      Initial Capital    Final Portfolio Value    Total Profits ($)    Cumulative Return (%)    Sharpe Ratio    Maximum Drawdown (%)
---------  -----------------  -----------------------  -------------------  -----------------------  --------------  ----------------------
AMZN                     100                  134.054              34.0539                  34.0539        0.54709                  25.8248
GOOGL                    100                  133.287              33.2866                  33.2866        0.553214                 25.9312
MSFT                     100                  239.08              139.08                   139.08          1.02436                  36.4538
