<a href="https://colab.research.google.com/github/UoB-DSMP-2023-24/dsmp-2024-group22/blob/main/DQNAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
auth.authenticate_user()

In [2]:
from google.cloud import storage
gcs = storage.Client()

In [3]:
import pandas as pd
import io

bucket_name = 'jpm-tapes'
directory_path = 'Tapes/'

bucket = gcs.get_bucket(bucket_name)

In [4]:
blobs = bucket.list_blobs(prefix=directory_path)  # Lists all the blobs in the directory

all_data_frames = []  # List to store each processed DataFrame

for blob in blobs:
    if blob.name.endswith('.csv'):  # Check if the blob is a CSV file
        # Read the content of the file
        data = blob.download_as_bytes()
        data_io = io.BytesIO(data)
        df = pd.read_csv(data_io, names=['Time', 'Price', 'Quantity'])

        # Convert 'Time' to a datetime format (assuming it's in seconds from the start of the day)
        df['DateTime'] = pd.to_datetime(df['Time'], unit='s', origin=pd.Timestamp('2025-01-01'))
        df.set_index('DateTime', inplace=True)

        # Resample and calculate OHLCV
        ohlc = df['Price'].resample('1T').ohlc()
        ohlc['Volume'] = df['Quantity'].resample('1T').sum()

        # Append the processed DataFrame to the list
        all_data_frames.append(ohlc)

# Concatenate all DataFrames into a single DataFrame
final_data_frame = pd.concat(all_data_frames)

# Display the result
print(final_data_frame.head())
print()
print(final_data_frame.tail())
print()
print(len(final_data_frame))
# Renaming the column properly if necessary
final_data_frame.columns = ['Open', 'High', 'Low', 'Close', 'Volume']


                     open  high  low  close  Volume
DateTime                                           
2025-01-01 00:00:00   267   270  252    261      68
2025-01-01 00:01:00   259   267  254    265     106
2025-01-01 00:02:00   261   269  250    266      89
2025-01-01 00:03:00   269   269  258    261      86
2025-01-01 00:04:00   261   270  258    269      78

                     open  high  low  close  Volume
DateTime                                           
2025-01-01 08:25:00   108   112  101    105     108
2025-01-01 08:26:00   108   112  104    107     112
2025-01-01 08:27:00   112   113   98     98      98
2025-01-01 08:28:00    98   112   98    109     100
2025-01-01 08:29:00   107   112  103    107     112

63750


In [5]:
!pip install tensorflow keras gym

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.optimizers import Adam
import random
from collections import deque




In [6]:
class TradingEnvironment:
    def __init__(self, data, initial_balance=10000):
        self.data = data
        self.initial_balance = initial_balance
        self.reset()

    def _get_state(self):
        frame = self.data.iloc[self.current_step]
        return np.array([frame['Open'], frame['High'], frame['Low'], frame['Close'], frame['Volume'], self.shares_held])

    def step(self, action):
        next_step = self.current_step + 1
        self.current_step = 0 if next_step >= len(self.data) else next_step
        frame = self.data.iloc[self.current_step]
        reward = 0

        if action == 1 and self.balance >= frame['Open']:  # Buy
            self.shares_held += 1
            self.balance -= frame['Open']
            reward = -1

        elif action == 2 and self.shares_held > 0:  # Sell
            self.shares_held -= 1
            self.balance += frame['Open']
            reward = 1

        next_state = self._get_state()
        return next_state, reward, self.current_step == len(self.data) - 1

    def reset(self):
        self.balance = self.initial_balance
        self.shares_held = 0
        self.current_step = 0
        return self._get_state()

In [7]:
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=6))
model.add(Dense(32, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(3, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=0.001))



In [8]:
def train_dqn(episodes=100):
    env = TradingEnvironment(data=final_data_frame)
    discount_factor = 0.95  # Discount factor for future rewards

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, 6])
        total_profit = 0  # Initialize profit tracking for the episode
        actions_count = {'hold': 0, 'buy': 0, 'sell': 0}  # Initialize action count

        for time in range(200):  # Limit the number of timesteps
            action = np.argmax(model.predict(state)[0])  # Choose the action with the highest Q-value prediction
            actions_count[['hold', 'buy', 'sell'][action]] += 1  # Update action count

            next_state, reward, done = env.step(action)
            total_profit += reward  # Accumulate profit
            next_state = np.reshape(next_state, [1, 6])

            # Predict future Q-values from next state
            future_Q = model.predict(next_state)[0]
            max_future_Q = np.max(future_Q)  # Max Q-value for the next state

            # Calculate the target Q-value
            target_Q = model.predict(state)  # Current predicted Q-values
            target_Q[0][action] = reward + discount_factor * max_future_Q  # Update Q-value for the action taken

            # Fit the model (train the network)
            model.fit(state, target_Q, epochs=1, verbose=0)

            state = next_state
            if done:
                break  # End the episode if done

        # Output a checkpoint at the end of every episode
        print(f"Checkpoint at episode {e+1}, recent profit: {total_profit}, Actions Taken: {actions_count}")

train_dqn()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Checkpoint at episode 92, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 93, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 94, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 95, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 96, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 97, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 98, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 99, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
Checkpoint at episode 100, recent profit: 0, Actions Taken: {'hold': 0, 'buy': 0, 'sell': 200}
