In [1]:
# Load Data
import pandas as pd

data_file_path = 'btcusd.csv'
df = pd.read_csv(data_file_path)

df.head()

Unnamed: 0,time,open,close,high,low,volume
0,1364774820000,93.25,93.3,93.3,93.25,93.3
1,1364774880000,100.0,100.0,100.0,100.0,93.3
2,1364774940000,93.3,93.3,93.3,93.3,33.676862
3,1364775060000,93.35,93.47,93.47,93.35,20.0
4,1364775120000,93.47,93.47,93.47,93.47,2.021627


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4527148 entries, 0 to 4527147
Data columns (total 6 columns):
 #   Column  Dtype  
---  ------  -----  
 0   time    int64  
 1   open    float64
 2   close   float64
 3   high    float64
 4   low     float64
 5   volume  float64
dtypes: float64(5), int64(1)
memory usage: 207.2 MB


In [3]:
# Convert timestamp to datetime

df['time']=pd.to_datetime(df['time'], unit = 'ms')
df.set_index('time', inplace=True)
df.head()

Unnamed: 0_level_0,open,close,high,low,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-04-01 00:07:00,93.25,93.3,93.3,93.25,93.3
2013-04-01 00:08:00,100.0,100.0,100.0,100.0,93.3
2013-04-01 00:09:00,93.3,93.3,93.3,93.3,33.676862
2013-04-01 00:11:00,93.35,93.47,93.47,93.35,20.0
2013-04-01 00:12:00,93.47,93.47,93.47,93.47,2.021627


In [4]:
if df.isnull().sum().sum() > 0:
    df.fillna(method='ffill', inplace=True)  # Forward fill to handle missing values
    print("Missing values found and handled with forward fill.")
else:
    print("No missing values found.")

No missing values found.


In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Feature Engineering
def compute_rsi(series, period):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

df['MA10'] = df['close'].rolling(window=10).mean()
df['RSI'] = compute_rsi(df['close'], 14)

# Dropping rows with NaN values created by rolling functions
df.dropna(inplace=True)

# Normalization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['open', 'close', 'high', 'low', 'volume', 'MA10', 'RSI']])

# Create a new DataFrame with scaled features for further use
scaled_df = pd.DataFrame(scaled_features, columns=['open', 'close', 'high', 'low', 'volume', 'MA10', 'RSI'], index=df.index)

# Display the prepared data
print("Prepared DataFrame:")
print(scaled_df.head())

Prepared DataFrame:
                         open     close      high       low    volume  \
time                                                                    
2013-04-01 01:07:00 -0.877838 -0.877837 -0.877822 -0.877854 -0.167338   
2013-04-01 01:11:00 -0.877832 -0.877837 -0.877816 -0.877854 -0.189204   
2013-04-01 01:12:00 -0.877832 -0.877830 -0.877815 -0.877865 -0.075994   
2013-04-01 01:13:00 -0.877832 -0.877830 -0.877815 -0.877847 -0.245944   
2013-04-01 01:23:00 -0.877832 -0.877831 -0.877815 -0.877847  0.045250   

                         MA10       RSI  
time                                     
2013-04-01 01:07:00 -0.877843 -0.010024  
2013-04-01 01:11:00 -0.877844 -0.010024  
2013-04-01 01:12:00 -0.877841 -1.987678  
2013-04-01 01:13:00 -0.877838  0.275520  
2013-04-01 01:23:00 -0.877837  0.002714  


In [21]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm


# Prepare the input features and the target parameters
# For this example, we'll use random target values for illustration purposes.
# In a real-world scenario, these target values should be based on historical data.
# We will assume a function `generate_ddm_parameters` to generate these values.
def generate_drift_rate(n_samples):
    np.random.seed(42)
    return np.random.uniform(-1, 1, n_samples).reshape(-1, 1)

def generate_ddm_params_ann(model, features):
    """
    Generate DDM parameters using the trained ANN model.
    
    Parameters:
    - model: The trained ANN model
    - features: The input features for the ANN
    
    Returns:
    - params: Generated DDM parameters (drift_rate, noise, upper_boundary, lower_boundary)
    """
    drift_rate = model.predict(features)
    noise = 0.1
    upper_boundary = 1.0
    lower_boundary = -1.0
    return drift_rate[0], noise, upper_boundary, lower_boundary

def ddm_next_state(current_state, drift_rate, noise, dt=1):
    """
    Computes the next state of the DDM given the current state and parameters.
    
    Parameters:
    - current_state: The current state of the decision variable
    - drift_rate: The drift rate parameter
    - noise: The noise parameter
    - dt: Time step (default is 1)
    
    Returns:
    - next_state: The next state of the decision variable
    """
    delta_state = drift_rate * dt + noise * np.random.normal()
    next_state = current_state + delta_state
    return next_state

def ddm_decision(state, upper_boundary, lower_boundary):
    """
    Makes a decision based on the current state and boundaries.
    
    Parameters:
    - state: The current state of the decision variable
    - upper_boundary: The upper boundary for making a buy decision
    - lower_boundary: The lower boundary for making a sell decision
    
    Returns:
    - decision: 'buy', 'sell', or 'hold'
    """
    if state >= upper_boundary:
        return 'buy'
    elif state <= lower_boundary:
        return 'sell'
    else:
        return 'hold'

# TensorFlow dataset
def create_dataset(data, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices(data)
    dataset = dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

dataset = create_dataset(scaled_df.values, batch_size=32)

# Define the ANN architecture
model = Sequential()
model.add(Input(shape=(scaled_df.shape[1],)))
model.add(Dense(6, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear')) # Only predicting drift rate

# Compile the model with a dummy loss (we'll be manually calculating the return)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Custom training loop with mixed precision and tf.function
initial_balance = 10000
epochs = 2
batch_size = 32
optimizer = Adam(learning_rate=0.001)

@tf.function
def train_step(X_batch):
    with tf.GradientTape() as tape:
        predictions = model(X_batch, training=True)
        total_return = tf.constant(0.0, dtype=tf.float32)
        
        num_predictions = tf.shape(predictions)[0]

        def body_fn(i, total_return):
            drift_rate = predictions[i]
            noise = 0.1
            upper_boundary = 1.0
            lower_boundary = -1.0
            state = tf.constant([0.0], dtype=tf.float32)
            position = tf.constant(0, dtype=tf.int32)
            entry_price = tf.constant(0.0, dtype=tf.float32)
            balance = tf.constant(initial_balance, dtype=tf.float32)

            num_batch = tf.shape(X_batch)[0]

            def inner_body_fn(j, state, balance, position, entry_price):
                row = X_batch[j]
                state = ddm_next_state(state[0], drift_rate, noise)
                state = tf.reshape(state, [1])  # Ensure state is always [1] shape
                decision = tf.cond(state[0] >= upper_boundary, lambda: tf.constant('buy'), 
                                   lambda: tf.cond(state[0] <= lower_boundary, lambda: tf.constant('sell'), 
                                                   lambda: tf.constant('hold')))

                if tf.equal(decision, 'buy') and tf.equal(position, 0):
                    position = tf.constant(1, dtype=tf.int32)
                    entry_price = tf.cast(row[1], dtype=tf.float32)  # Assuming 'close' is the second column
                elif tf.equal(decision, 'sell') and tf.equal(position, 1):
                    balance += (tf.cast(row[1], dtype=tf.float32) - entry_price) * (balance / entry_price)
                    position = tf.constant(0, dtype=tf.int32)

                return j + 1, state, balance, position, entry_price

            j = tf.constant(0)
            _, state, balance, position, entry_price = tf.while_loop(
                lambda j, state, balance, position, entry_price: tf.less(j, num_batch),
                inner_body_fn, 
                loop_vars=[j, state, balance, position, entry_price],
                shape_invariants=[j.get_shape(), tf.TensorShape([1]), balance.get_shape(), position.get_shape(), entry_price.get_shape()]
            )

            if tf.equal(position, 1):
                balance += (tf.cast(X_batch[-1][1], dtype=tf.float32) - entry_price) * (balance / entry_price)

            total_return += balance - initial_balance
            return i + 1, total_return

        i = tf.constant(0)
        _, total_return = tf.while_loop(
            lambda i, total_return: tf.less(i, num_predictions),
            body_fn,
            loop_vars=[i, total_return],
            shape_invariants=[i.get_shape(), total_return.get_shape()]
        )

        loss = -total_return
        loss = tf.convert_to_tensor(loss, dtype=tf.float32)

    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    return loss

for epoch in range(epochs):
    epoch_loss = tf.constant(0.0, dtype=tf.float32)
    epoch_return = tf.constant(0.0, dtype=tf.float32)
    with tqdm(total=len(scaled_df), desc=f"Epoch {epoch + 1}/{epochs}", unit="batch") as pbar:
        for X_batch in dataset:
            loss = train_step(X_batch)
            epoch_loss += loss
            pbar.update(batch_size)

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss.numpy()}, Total Return: {epoch_return}")


Epoch 1/2: 4526400batch [1:29:51, 425.23batch/s]                            2024-06-01 20:12:52.118794: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Epoch 1/2: 4526400batch [1:29:51, 839.54batch/s]


Epoch 1/2, Loss: 598651776.0, Total Return: 0.0


Epoch 2/2: 100%|█████████▉| 4526304/4526373 [1:36:47<00:00, 850.34batch/s]  2024-06-01 21:49:39.571010: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Epoch 2/2: 4526400batch [1:36:47, 779.41batch/s]                          

Epoch 2/2, Loss: 1593033984.0, Total Return: 0.0





In [24]:
from datetime import timedelta

# Function to backtest the trading strategy
def backtest_strategy(model, data, initial_balance=10000):
    balance = initial_balance
    position = 0  # 0 for no position, 1 for holding a position
    history = []
    state = 0.0  # Initial state for DDM
    
    for index, row in data.iterrows():
        features = row[['open', 'close', 'high', 'low', 'volume', 'MA10', 'RSI']].values.reshape(1, -1)
        drift_rate, noise, upper_boundary, lower_boundary = generate_ddm_params_ann(model, features)
        state = ddm_next_state(state, drift_rate, noise)
        decision = ddm_decision(state, upper_boundary, lower_boundary)
        
        if decision == 'buy' and position == 0:
            position = 1
            entry_price = row['close']
            history.append((index, balance, 'buy'))
        elif decision == 'sell' and position == 1:
            balance += (row['close'] - entry_price) * (balance / entry_price)
            position = 0
            history.append((index, balance, 'sell'))
        else:
            history.append((index, balance, 'hold'))
    
    if position == 1:  # Sell remaining position at the end of the period
        balance += (row['close'] - entry_price) * (balance / entry_price)
    
    return balance, history

# Function to backtest buy and hold strategy
def buy_and_hold_strategy(data, initial_balance=10000):
    entry_price = data.iloc[0]['close']
    final_price = data.iloc[-1]['close']
    final_balance = initial_balance * (final_price / entry_price)
    history = [(index, initial_balance * (row['close'] / entry_price)) for index, row in data.iterrows()]
    return final_balance, history

# Filter the data to include only the last three years from the last entry date
last_date = df.index[-1]
time_limit = last_date - timedelta(days=1)
filtered_df = df[df.index >= time_limit]

# Backtest the strategy using the filtered data
final_balance_strategy, history_strategy = backtest_strategy(model, filtered_df)

# Backtest the buy and hold strategy using filtered data
final_balance_bh, history_bh = buy_and_hold_strategy(filtered_df)

# Display backtest history
history_df_strategy = pd.DataFrame(history_strategy, columns=['Date', 'Balance', 'Decision'])
history_df_bh = pd.DataFrame(history_bh, columns=['Date', 'Balance'])

print("Strategy History:")
print(history_df_strategy.head())

print("Buy and Hold History:")
print(history_df_bh.head())

print(f"Final Balance: {final_balance_strategy}")
print(f"Final Balance (Buy and Hold): {final_balance_bh}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m