<a href="https://colab.research.google.com/github/UoB-DSMP-2023-24/dsmp-2024-group22/blob/main/DQNAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
from google.colab import auth
auth.authenticate_user()

In [11]:
from google.cloud import storage
gcs = storage.Client()

In [12]:
import pandas as pd
import io

bucket_name = 'jpm-tapes'
directory_path = 'Tapes/'

bucket = gcs.get_bucket(bucket_name)

In [13]:
blobs = bucket.list_blobs(prefix=directory_path)

all_data_frames = []

for blob in blobs:
    if blob.name.endswith('.csv'):
        data = blob.download_as_bytes()
        data_io = io.BytesIO(data)
        df = pd.read_csv(data_io, names=['Time', 'Price', 'Quantity'])

        df['DateTime'] = pd.to_datetime(df['Time'], unit='s', origin=pd.Timestamp('2025-01-01'))
        df.set_index('DateTime', inplace=True)

        ohlc = df['Price'].resample('1T').ohlc()
        ohlc['Volume'] = df['Quantity'].resample('1T').sum()

        all_data_frames.append(ohlc)

final_data_frame = pd.concat(all_data_frames)

print(final_data_frame.head())

final_data_frame.columns = ['Open', 'High', 'Low', 'Close', 'Volume']


                     open  high  low  close  Volume
DateTime                                           
2025-01-01 00:00:00   267   270  252    261      68
2025-01-01 00:01:00   259   267  254    265     106
2025-01-01 00:02:00   261   269  250    266      89
2025-01-01 00:03:00   269   269  258    261      86
2025-01-01 00:04:00   261   270  258    269      78


In [14]:
!pip install tensorflow keras gym

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.optimizers import Adam
import random
from collections import deque




In [15]:
class TradingEnvironment:
    def __init__(self, data, initial_balance=10000):
        self.data = data
        self.initial_balance = initial_balance
        self.reset()

    def _get_state(self):
        frame = self.data.iloc[self.current_step]
        return np.array([frame['Open'], frame['High'], frame['Low'], frame['Close'], frame['Volume'], self.shares_held])

    def step(self, action):
        next_step = self.current_step + 1
        self.current_step = 0 if next_step >= len(self.data) else next_step
        frame = self.data.iloc[self.current_step]
        reward = 0

        if action == 1 and self.balance >= frame['Open']:  # Buy
            self.shares_held += 1
            self.balance -= frame['Open']
            reward = -1

        elif action == 2 and self.shares_held > 0:  # Sell
            self.shares_held -= 1
            self.balance += frame['Open']
            reward = 1

        next_state = self._get_state()
        return next_state, reward, self.current_step == len(self.data) - 1

    def reset(self):
        self.balance = self.initial_balance
        self.shares_held = 0
        self.current_step = 0
        return self._get_state()

In [16]:
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=6))
model.add(Dense(32, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(3, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=0.001))



In [17]:
def train_dqn(episodes=10):
    env = TradingEnvironment(data=final_data_frame)
    discount_factor = 0.95

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, 6])
        total_profit = 0
        actions_count = {'hold': 0, 'buy': 0, 'sell': 0}

        for time in range(20):
            action = np.argmax(model.predict(state)[0])
            actions_count[['hold', 'buy', 'sell'][action]] += 1

            next_state, reward, done = env.step(action)
            total_profit += reward
            next_state = np.reshape(next_state, [1, 6])

            future_Q = model.predict(next_state)[0]
            max_future_Q = np.max(future_Q)

            target_Q = model.predict(state)
            target_Q[0][action] = reward + discount_factor * max_future_Q

            model.fit(state, target_Q, epochs=1, verbose=0)

            state = next_state
            if done:
                break

        print(f"Checkpoint at episode {e+1}, recent profit: {total_profit}, Actions Taken: {actions_count}")

train_dqn()


Checkpoint at episode 1, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 2, 'sell': 18}
Checkpoint at episode 2, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 1, 'sell': 19}
Checkpoint at episode 3, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 1, 'sell': 19}
Checkpoint at episode 4, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 20}
Checkpoint at episode 5, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 1, 'sell': 19}
Checkpoint at episode 6, recent profit: 0, Actions Taken: {'hold': 10, 'buy': 0, 'sell': 10}
Checkpoint at episode 7, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 20}
Checkpoint at episode 8, recent profit: 0, Actions Taken: {'hold': 12, 'buy': 0, 'sell': 8}
Checkpoint at episode 9, recent profit: 0, Actions Taken: {'hold': 20, 'buy': 0, 'sell': 0}
Checkpoint at episode 10, recent profit: 0, Actions Taken: {'hold': 20, 'buy': 0, 'sell': 0}
