In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from collections import deque
import random
import numpy as np
import multiprocessing

# Load data
train_df = pd.read_excel('ftw1002023.xlsx')
test_df = pd.read_excel('ft100test.xlsx')

# Normalize data
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_df[['Close', 'Open', 'Low', 'High', 'Volume', 'Turnover - GBP', 'Flow']])
test_data = scaler.transform(test_df[['Close', 'Open', 'Low', 'High', 'Volume', 'Turnover - GBP', 'Flow']])

# Set TensorFlow to use all available CPU cores
num_cpus = multiprocessing.cpu_count()
tf.config.threading.set_intra_op_parallelism_threads(num_cpus)
tf.config.threading.set_inter_op_parallelism_threads(num_cpus)


In [4]:
# Define the Actor Network
def create_actor(state_size, action_size, hidden_size):
    model = models.Sequential([
        layers.Input(shape=(state_size,)),
        layers.Dense(hidden_size, activation='relu'),
        layers.Dense(hidden_size, activation='relu'),
        layers.Dense(action_size, activation='tanh')
    ])
    return model

# Define the Critic Network
def create_critic(state_size, action_size, hidden_size):
    state_input = layers.Input(shape=(state_size,))
    action_input = layers.Input(shape=(action_size,))
    concat = layers.Concatenate()([state_input, action_input])
    
    dense = layers.Dense(hidden_size, activation='relu')(concat)
    dense = layers.Dense(hidden_size, activation='relu')(dense)
    output = layers.Dense(1)(dense)
    
    model = models.Model(inputs=[state_input, action_input], outputs=output)
    return model

# Initialize parameters
state_size = train_data.shape[1]
action_size = 1  # Buy, Sell, Hold
hidden_size = 64
actor_lr = 1e-4
critic_lr = 1e-3
gamma = 0.99
tau = 0.001
buffer_size = 1000000
batch_size = 64

# Initialize actor and critic networks
actor = create_actor(state_size, action_size, hidden_size)
critic = create_critic(state_size, action_size, hidden_size)

# Initialize target networks
target_actor = create_actor(state_size, action_size, hidden_size)
target_critic = create_critic(state_size, action_size, hidden_size)
target_actor.set_weights(actor.get_weights())
target_critic.set_weights(critic.get_weights())

# Define optimizers
actor_optimizer = optimizers.Adam(learning_rate=actor_lr)
critic_optimizer = optimizers.Adam(learning_rate=critic_lr)


In [5]:
class ReplayBuffer:
    def __init__(self, buffer_size, batch_size):
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.buffer = deque(maxlen=buffer_size)
    
    def add(self, state, action, reward, next_state, done):
        experience = (state, action, reward, next_state, done)
        self.buffer.append(experience)
    
    def sample(self):
        experiences = random.sample(self.buffer, k=self.batch_size)
        states, actions, rewards, next_states, dones = zip(*experiences)
        
        states = np.array(states)
        actions = np.array(actions)
        rewards = np.array(rewards).reshape(-1, 1)
        next_states = np.array(next_states)
        dones = np.array(dones).reshape(-1, 1)
        
        return states, actions, rewards, next_states, dones
    
    def __len__(self):
        return len(self.buffer)

# Initialize replay buffer
replay_buffer = ReplayBuffer(buffer_size, batch_size)


In [6]:
# Utility function to update target networks
def soft_update(local_model, target_model, tau):
    local_weights = np.array(local_model.get_weights())
    target_weights = np.array(target_model.get_weights())
    new_weights = tau * local_weights + (1.0 - tau) * target_weights
    target_model.set_weights(new_weights)


In [7]:
def train_ddpg_agent(train_data, num_episodes=100):
    for episode in range(num_episodes):
        state = train_data[0]  # Initial state
        state = np.reshape(state, [1, state_size])
        
        for t in range(1, len(train_data)):
            state_tensor = tf.convert_to_tensor(state, dtype=tf.float32)
            action = actor(state_tensor)
            action = action.numpy()
            action = np.clip(action, -1, 1)
            
            next_state = train_data[t]
            next_state = np.reshape(next_state, [1, state_size])
            
            reward = next_state[0, 0] - state[0, 0]
            done = t == (len(train_data) - 1)
            
            replay_buffer.add(state, action, reward, next_state, done)
            
            state = next_state
            
            if len(replay_buffer) > batch_size:
                experiences = replay_buffer.sample()
                states, actions, rewards, next_states, dones = experiences
                
                # Ensure states and next_states are 2D tensors with shape (batch_size, state_size)
                states = np.reshape(states, (batch_size, state_size))
                next_states = np.reshape(next_states, (batch_size, state_size))
                
                # Ensure actions are 2D tensors with shape (batch_size, action_size)
                actions = np.reshape(actions, (batch_size, action_size))
                
                # Convert to tensors
                states = tf.convert_to_tensor(states, dtype=tf.float32)
                next_states = tf.convert_to_tensor(next_states, dtype=tf.float32)
                actions = tf.convert_to_tensor(actions, dtype=tf.float32)
                rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)
                dones = tf.convert_to_tensor(dones, dtype=tf.float32)
                
                next_actions = target_actor(next_states)
                
                # Compute the target Q values
                target_q_values = target_critic([next_states, next_actions])
                target_q_values = rewards + (gamma * target_q_values * (1 - dones))
                
                with tf.GradientTape() as tape:
                    expected_q_values = critic([states, actions])
                    critic_loss = tf.reduce_mean(tf.square(expected_q_values - target_q_values))
                critic_grads = tape.gradient(critic_loss, critic.trainable_variables)
                critic_optimizer.apply_gradients(zip(critic_grads, critic.trainable_variables))
                
                with tf.GradientTape() as tape:
                    predicted_actions = actor(states)
                    actor_loss = -tf.reduce_mean(critic([states, predicted_actions]))
                actor_grads = tape.gradient(actor_loss, actor.trainable_variables)
                actor_optimizer.apply_gradients(zip(actor_grads, actor.trainable_variables))
                
                soft_update(actor, target_actor, tau)
                soft_update(critic, target_critic, tau)
        
        if episode % 10 == 0:
            print(f'Episode {episode}/{num_episodes}, Actor Loss: {actor_loss.numpy()}, Critic Loss: {critic_loss.numpy()}')

# Train the DDPG agent on the training data
train_ddpg_agent(train_data, num_episodes=100)


  local_weights = np.array(local_model.get_weights())
  target_weights = np.array(target_model.get_weights())


Episode 0/100, Actor Loss: -0.12531980872154236, Critic Loss: 0.000376245123334229
Episode 10/100, Actor Loss: -0.12905961275100708, Critic Loss: 0.0007286440813913941
Episode 20/100, Actor Loss: -0.10035770386457443, Critic Loss: 0.0002812808961607516
Episode 30/100, Actor Loss: -0.08590079098939896, Critic Loss: 0.00015672999143134803
Episode 40/100, Actor Loss: -0.060856305062770844, Critic Loss: 0.00020405274699442089
Episode 50/100, Actor Loss: -0.05560988187789917, Critic Loss: 0.0001009659463306889
Episode 60/100, Actor Loss: -0.04375579208135605, Critic Loss: 0.00011749286932172254
Episode 70/100, Actor Loss: -0.02192344143986702, Critic Loss: 8.865556446835399e-05
Episode 80/100, Actor Loss: -0.018577896058559418, Critic Loss: 0.0001637219829717651
Episode 90/100, Actor Loss: -0.021822737529873848, Critic Loss: 0.0001445497910026461


In [8]:
# Backtest the DDPG agent
def backtest_ddpg_agent(test_data, actor):
    state = test_data[0]
    state = np.reshape(state, [1, state_size])
    total_reward = 0
    portfolio_value = 1  # Starting with an initial portfolio value of 1

    for t in range(1, len(test_data)):
        state_tensor = tf.convert_to_tensor(state, dtype=tf.float32)
        action = actor(state_tensor)
        action = action.numpy()
        action = np.clip(action, -1, 1)

        next_state = test_data[t]
        next_state = np.reshape(next_state, [1, state_size])

        # Simulate trading: Buy (1), Sell (-1), or Hold (0)
        reward = (next_state[0, 0] - state[0, 0]) * action[0, 0]  # Simplified reward: change in 'Close' price
        total_reward += reward
        portfolio_value *= (1 + reward)  # Update portfolio value

        state = next_state

    return total_reward, portfolio_value

total_reward, portfolio_value = backtest_ddpg_agent(test_data, actor)
print(f"Total Reward: {total_reward}, Final Portfolio Value: {portfolio_value}")

# Evaluate buy-and-hold strategy
def buy_and_hold(test_data):
    initial_price = test_data[0, 0]
    final_price = test_data[-1, 0]
    return final_price / initial_price

buy_and_hold_return = buy_and_hold(test_data)
print(f"Buy and Hold Return: {buy_and_hold_return}")

# Compare the two strategies
print(f"DDPG Strategy Final Portfolio Value: {portfolio_value}")
print(f"Buy and Hold Strategy Final Portfolio Value: {buy_and_hold_return}")


Total Reward: 0.1633237438053663, Final Portfolio Value: 1.1698804939428415
Buy and Hold Return: 0.8505684595451913
DDPG Strategy Final Portfolio Value: 1.1698804939428415
Buy and Hold Strategy Final Portfolio Value: 0.8505684595451913
