In [1]:
# Load Data
import pandas as pd

data_file_path = '.\\btcusd.csv'
df = pd.read_csv(data_file_path)

df.head()

Unnamed: 0,time,open,close,high,low,volume
0,1364774820000,93.25,93.3,93.3,93.25,93.3
1,1364774880000,100.0,100.0,100.0,100.0,93.3
2,1364774940000,93.3,93.3,93.3,93.3,33.676862
3,1364775060000,93.35,93.47,93.47,93.35,20.0
4,1364775120000,93.47,93.47,93.47,93.47,2.021627


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4527148 entries, 0 to 4527147
Data columns (total 6 columns):
 #   Column  Dtype  
---  ------  -----  
 0   time    int64  
 1   open    float64
 2   close   float64
 3   high    float64
 4   low     float64
 5   volume  float64
dtypes: float64(5), int64(1)
memory usage: 207.2 MB


In [3]:
# Convert timestamp to datetime

df['time']=pd.to_datetime(df['time'], unit = 'ms')
df.set_index('time', inplace=True)
df.head()

Unnamed: 0_level_0,open,close,high,low,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-04-01 00:07:00,93.25,93.3,93.3,93.25,93.3
2013-04-01 00:08:00,100.0,100.0,100.0,100.0,93.3
2013-04-01 00:09:00,93.3,93.3,93.3,93.3,33.676862
2013-04-01 00:11:00,93.35,93.47,93.47,93.35,20.0
2013-04-01 00:12:00,93.47,93.47,93.47,93.47,2.021627


In [4]:
if df.isnull().sum().sum() > 0:
    df.fillna(method='ffill', inplace=True)  # Forward fill to handle missing values
    print("Missing values found and handled with forward fill.")
else:
    print("No missing values found.")

No missing values found.


In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Feature Engineering
def compute_rsi(series, period):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

df['MA10'] = df['close'].rolling(window=10).mean()
df['RSI'] = compute_rsi(df['close'], 14)

# Dropping rows with NaN values created by rolling functions
df.dropna(inplace=True)

# Normalization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['open', 'close', 'high', 'low', 'volume', 'MA10', 'RSI']])

# Create a new DataFrame with scaled features for further use
scaled_df = pd.DataFrame(scaled_features, columns=['open', 'close', 'high', 'low', 'volume', 'MA10', 'RSI'], index=df.index)

# Display the prepared data
print("Prepared DataFrame:")
print(scaled_df.head())

Prepared DataFrame:
                         open     close      high       low    volume  \
time                                                                    
2013-04-01 01:07:00 -0.877838 -0.877837 -0.877822 -0.877854 -0.167338   
2013-04-01 01:11:00 -0.877832 -0.877837 -0.877816 -0.877854 -0.189204   
2013-04-01 01:12:00 -0.877832 -0.877830 -0.877815 -0.877865 -0.075994   
2013-04-01 01:13:00 -0.877832 -0.877830 -0.877815 -0.877847 -0.245944   
2013-04-01 01:23:00 -0.877832 -0.877831 -0.877815 -0.877847  0.045250   

                         MA10       RSI  
time                                     
2013-04-01 01:07:00 -0.877843 -0.010024  
2013-04-01 01:11:00 -0.877844 -0.010024  
2013-04-01 01:12:00 -0.877841 -1.987678  
2013-04-01 01:13:00 -0.877838  0.275520  
2013-04-01 01:23:00 -0.877837  0.002714  


In [8]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Prepare the input features and the target parameters
# For this example, we'll use random target values for illustration purposes.
# In a real-world scenario, these target values should be based on historical data.
# We will assume a function `generate_ddm_parameters` to generate these values.
def generate_drift_rate(n_samples):
    np.random.seed(42)
    return np.random.uniform(-1, 1, n_samples).reshape(-1, 1)

# Generate target DDM parameters for illustration
n_samples = scaled_df.shape[0]
drift_rates = generate_drift_rate(scaled_df.shape[0])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_df, drift_rates, test_size=0.2, random_state=42)

# Define the ANN architecture
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))  # Only predicting drift rate

# Compile the model
model.compile(loss='mse', optimizer='adam')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f'Model Loss: {loss}')

# Make predictions
predictions = model.predict(X_test)
print("Sample Predictions:")
print(predictions[:5])

# Display the first few predictions along with actual values for comparison
comparison_df = pd.DataFrame({
    'Actual Drift Rate': y_test[:, 0], 
    'Predicted Drift Rate': predictions[:, 0]
})

print(comparison_df.head())

Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m90528/90528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 737us/step - loss: 0.1320 - val_loss: 0.1301
Epoch 2/2
[1m90528/90528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 766us/step - loss: 0.1303 - val_loss: 0.1302
[1m28290/28290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 538us/step - loss: 0.1303
Model Loss: 0.1302645355463028
[1m28290/28290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 509us/step
Sample Predictions:
[[ 0.01382258  0.23986262  0.994179   -0.99952185]
 [ 0.01382258  0.23986262  0.994179   -0.99952185]
 [ 0.01382258  0.23986262  0.994179   -0.99952185]
 [ 0.01382258  0.23986262  0.994179   -0.99952185]
 [ 0.01382258  0.23986262  0.994179   -0.99952185]]
   Actual Drift Rate  Predicted Drift Rate  Actual Noise  Predicted Noise  \
0          -0.309330              0.013823      0.103221         0.239863   
1          -0.198169              0.013823      0.453866         0.239863   
2          -0.626399              0.01382

In [None]:
from datetime import timedelta

def generate_ddm_params_ann(model, features):
    """
    Generate DDM parameters using the trained ANN model.
    
    Parameters:
    - model: The trained ANN model
    - features: The input features for the ANN
    
    Returns:
    - params: Generated DDM parameters (drift_rate, noise, upper_boundary, lower_boundary)
    """
    drift_rate = model.predict(features)
    noise = 0.1
    upper_boundary = 1.0
    lower_boundary = -1.0
    return drift_rate[0], noise, upper_boundary, lower_boundary

def ddm_next_state(current_state, drift_rate, noise, dt=1):
    """
    Computes the next state of the DDM given the current state and parameters.
    
    Parameters:
    - current_state: The current state of the decision variable
    - drift_rate: The drift rate parameter
    - noise: The noise parameter
    - dt: Time step (default is 1)
    
    Returns:
    - next_state: The next state of the decision variable
    """
    delta_state = drift_rate * dt + noise * np.random.normal()
    next_state = current_state + delta_state
    return next_state

def ddm_decision(state, upper_boundary, lower_boundary):
    """
    Makes a decision based on the current state and boundaries.
    
    Parameters:
    - state: The current state of the decision variable
    - upper_boundary: The upper boundary for making a buy decision
    - lower_boundary: The lower boundary for making a sell decision
    
    Returns:
    - decision: 'buy', 'sell', or 'hold'
    """
    if state >= upper_boundary:
        return 'buy'
    elif state <= lower_boundary:
        return 'sell'
    else:
        return 'hold'

# Function to backtest the trading strategy
def backtest_strategy(model, data, initial_balance=10000):
    balance = initial_balance
    position = 0  # 0 for no position, 1 for holding a position
    history = []
    state = 0.0  # Initial state for DDM
    
    for index, row in data.iterrows():
        features = row[['open', 'close', 'high', 'low', 'volume', 'MA10', 'RSI']].values.reshape(1, -1)
        drift_rate, noise, upper_boundary, lower_boundary = generate_ddm_params_ann(model, features)
        state = ddm_next_state(state, drift_rate, noise)
        decision = ddm_decision(state, upper_boundary, lower_boundary)
        
        if decision == 'buy' and position == 0:
            position = 1
            entry_price = row['close']
            history.append((index, balance, 'buy'))
        elif decision == 'sell' and position == 1:
            balance += (row['close'] - entry_price) * (balance / entry_price)
            position = 0
            history.append((index, balance, 'sell'))
        else:
            history.append((index, balance, 'hold'))
    
    if position == 1:  # Sell remaining position at the end of the period
        balance += (row['close'] - entry_price) * (balance / entry_price)
    
    return balance, history

# Filter the data to include only the last three years from the last entry date
last_date = df.index[-1]
time_limit = last_date - timedelta(days=10)
filtered_df = df[df.index >= time_limit]

# Backtest the strategy using the filtered data
final_balance, history = backtest_strategy(model, filtered_df)
print(f"Final Balance: {final_balance}")

# Display the first few entries of the backtest history
history_df = pd.DataFrame(history, columns=['Date', 'Balance', 'Decision'])
print(history_df.head())