In [8]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from gpflow.kernels import Matern32, Kernel
from gpflow.models import GPR
from gpflow import set_trainable
from sklearn.preprocessing import StandardScaler
import vectorbt as vbt
from datetime import datetime, timedelta
import ruptures as rpt
from scipy.signal import find_peaks
from sklearn.model_selection import ParameterGrid

class ChangePointKernel(Kernel):
    def __init__(self, base_kernel, changepoints):
        super().__init__()
        self.base_kernel = base_kernel
        self.changepoints = changepoints

    def get_region(self, X):
        cp = tf.constant(self.changepoints, dtype=X.dtype)
        regions = tf.searchsorted(cp, X[:, 0], side='right')
        return regions

    def K(self, X, X2=None):
        if X2 is None:
            X2 = X
        regions_X = self.get_region(X)
        regions_X2 = self.get_region(X2)
        regions_equal = tf.cast(tf.equal(tf.expand_dims(regions_X, 1), tf.expand_dims(regions_X2, 0)), X.dtype)
        base_cov = self.base_kernel.K(X, X2)
        return base_cov * regions_equal

    def K_diag(self, X):
        return self.base_kernel.K_diag(X)

def fetch_and_process_data(
    ticker="BTC-USD",
    start_date=(datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d'),
    end_date=datetime.now().strftime('%Y-%m-%d')
):
    data = yf.download(ticker, start=start_date, end=end_date)
    if data.empty:
        raise ValueError(f"No data found for {ticker} between {start_date} and {end_date}")
    if "Adj Close" in data.columns:
        price_series = data["Adj Close"].copy()
    else:
        price_series = data["Close"].copy()
    if isinstance(price_series, pd.DataFrame):
        price_series = price_series.squeeze()
    price_series.name = ticker
    returns = price_series.pct_change().dropna()
    scaler = StandardScaler()
    std_returns = scaler.fit_transform(returns.values.reshape(-1, 1))
    valid_index = returns.index
    return price_series.loc[valid_index], pd.Series(
        std_returns.flatten(), index=valid_index, name="Standardized Returns"
    )

def detect_changepoints(returns, lookback_window=21):
    algo = rpt.Pelt(model="rbf").fit(returns.values)
    result = algo.predict(pen=lookback_window)
    return result

def generate_signals(returns, changepoints, train_ratio=0.8, epochs=50, lookback_window=30, dropout_rate=0.2, lstm_hidden_units=64, learning_rate=0.001, batch_size=32):
    # Prepare features using time index
    X = np.arange(len(returns), dtype=np.float64).reshape(-1, 1)
    y = returns.values.reshape(-1, 1)
    # Gaussian Process Regression with changepoint kernel for trend extraction
    base_kernel = Matern32()
    kernel = ChangePointKernel(base_kernel, changepoints)
    gpr = GPR(data=(X, y), kernel=kernel)
    set_trainable(gpr.likelihood.variance, False)
    trend = gpr.predict_f(X)[0].numpy().flatten()
    # Combine returns and trend to form features
    features = np.hstack([returns.values.reshape(-1, 1), trend.reshape(-1, 1)])
    split_index = int(len(features) * train_ratio)
    train_features = features[:split_index]
    train_labels = returns.values[:split_index]
    train_features = train_features.reshape((train_features.shape[0], 1, train_features.shape[1]))
    # Build LSTM model
    model = Sequential([
        LSTM(lstm_hidden_units, return_sequences=True, input_shape=(train_features.shape[1], train_features.shape[2])),
        Dropout(dropout_rate),
        LSTM(int(lstm_hidden_units/2)),
        Dense(1, activation="tanh")
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
    model.fit(train_features, train_labels, epochs=epochs, batch_size=batch_size, verbose=0)
    all_features = features.reshape((features.shape[0], 1, features.shape[1]))
    predicted_signals = model.predict(all_features, verbose=0).flatten()
    positions = np.clip(predicted_signals, -1, 1)
    return positions

def backtest_strategy(price_series, positions):
    if isinstance(price_series, pd.DataFrame):
        price_series = price_series.squeeze()
    positions = pd.Series(positions, index=price_series.index, name=price_series.name)
    entries = positions > 0
    exits = positions < 0
    pf = vbt.Portfolio.from_signals(
        close=price_series, 
        entries=entries, 
        exits=exits, 
        size=np.abs(positions), 
        freq="1D"
    )
    return pf

def evaluate_performance(portfolio):
    print("\nPortfolio Metrics:")
    print(f"Total Return: {float(portfolio.total_return()):.2%}")
    print(f"Sharpe Ratio: {float(portfolio.sharpe_ratio()):.2f}")
    print(f"Max Drawdown: {float(portfolio.max_drawdown()):.2%}")
    fig_value = portfolio.plot_value(
        title=f'Portfolio Value Over Time - {portfolio.close.name}'
    )
    fig_value.show()
    fig_drawdowns = portfolio.plot_drawdowns(
        title=f'Portfolio Drawdowns - {portfolio.close.name}'
    )
    fig_drawdowns.show()

def main():
    ticker = "BTC-USD"
    price_series, returns = fetch_and_process_data(
        ticker=ticker,
        start_date="2020-01-01",
        end_date="2020-12-31"
    )
    # Reduced hyperparameter grid for faster testing
    LBW_range = [10, 21, 63, 252]  # CPD lookback window values
    dropout_rate_range = [0.2, 0.4]
    hidden_units_range = [40, 80]
    batch_size_range = [64, 128]
    learning_rate_range = [1e-3, 1e-2]
    param_grid = {
        'lookback_window': LBW_range,
        'dropout_rate': dropout_rate_range,
        'lstm_hidden_units': hidden_units_range,
        'batch_size': batch_size_range,
        'learning_rate': learning_rate_range
    }
    best_sharpe_ratio = -np.inf
    best_params = None
    best_portfolio = None
    for params in ParameterGrid(param_grid):
        changepoints = detect_changepoints(returns, lookback_window=params['lookback_window'])
        positions = generate_signals(
            returns, changepoints,
            train_ratio=0.8,
            epochs=50,
            lookback_window=params['lookback_window'],
            dropout_rate=params['dropout_rate'],
            lstm_hidden_units=params['lstm_hidden_units'],
            learning_rate=params['learning_rate'],
            batch_size=params['batch_size']
        )
        portfolio = backtest_strategy(price_series, positions)
        sharpe_ratio = float(portfolio.sharpe_ratio())
        if sharpe_ratio > best_sharpe_ratio:
            best_sharpe_ratio = sharpe_ratio
            best_params = params
            best_portfolio = portfolio
    print(f"Best Parameters: {best_params}")
    evaluate_performance(best_portfolio)
    print(best_portfolio.stats())

if __name__ == "__main__":
    main()

[*********************100%***********************]  1 of 1 completed

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



Best Parameters: {'batch_size': 128, 'dropout_rate': 0.4, 'learning_rate': 0.001, 'lookback_window': 10, 'lstm_hidden_units': 40}

Portfolio Metrics:
Total Return: 158.61%
Sharpe Ratio: 2.39
Max Drawdown: -25.46%


Start                               2020-01-02 00:00:00
End                                 2020-12-30 00:00:00
Period                                364 days 00:00:00
Start Value                                       100.0
End Value                                    258.605705
Total Return [%]                             158.605705
Benchmark Return [%]                         312.870605
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              25.459303
Max Drawdown Duration                  95 days 00:00:00
Total Trades                                         93
Total Closed Trades                                  92
Total Open Trades                                     1
Open Trade PnL                                15.746671
Win Rate [%]                                  45.652174
Best Trade [%]                                24.854946
Worst Trade [%]                               -7

In [9]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from gpflow.kernels import Matern32, Kernel
from gpflow.models import GPR
from gpflow import set_trainable
from sklearn.preprocessing import StandardScaler
import vectorbt as vbt
from datetime import datetime, timedelta
import ruptures as rpt

class ChangePointKernel(Kernel):
    def __init__(self, base_kernel, changepoints):
        super().__init__()
        self.base_kernel = base_kernel
        # List of indices where a regime change is detected.
        self.changepoints = changepoints

    def get_region(self, X):
        cp = tf.constant(self.changepoints, dtype=X.dtype)
        regions = tf.searchsorted(cp, X[:, 0], side='right')
        return regions

    def K(self, X, X2=None):
        if X2 is None:
            X2 = X
        regions_X = self.get_region(X)
        regions_X2 = self.get_region(X2)
        regions_equal = tf.cast(tf.equal(tf.expand_dims(regions_X, 1), tf.expand_dims(regions_X2, 0)), X.dtype)
        base_cov = self.base_kernel.K(X, X2)
        return base_cov * regions_equal

    def K_diag(self, X):
        return self.base_kernel.K_diag(X)

def fetch_and_process_data(
    ticker="BTC-USD",
    start_date=(datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d'),
    end_date=datetime.now().strftime('%Y-%m-%d')
):
    data = yf.download(ticker, start=start_date, end=end_date)
    if data.empty:
        raise ValueError(f"No data found for {ticker} between {start_date} and {end_date}")
    if "Adj Close" in data.columns:
        price_series = data["Adj Close"].copy()
    else:
        price_series = data["Close"].copy()
    if isinstance(price_series, pd.DataFrame):
        price_series = price_series.squeeze()
    price_series.name = ticker
    returns = price_series.pct_change().dropna()
    scaler = StandardScaler()
    std_returns = scaler.fit_transform(returns.values.reshape(-1, 1))
    valid_index = returns.index
    return price_series.loc[valid_index], pd.Series(
        std_returns.flatten(), index=valid_index, name="Standardized Returns"
    )

def detect_changepoints(returns, lookback_window):
    # Using ruptures with an RBF model and a penalty equal to lookback_window.
    algo = rpt.Pelt(model="rbf").fit(returns.values)
    result = algo.predict(pen=lookback_window)
    return result

def generate_signals(returns, changepoints, train_ratio=0.8, epochs=50, lookback_window=30,
                     dropout_rate=0.2, lstm_hidden_units=40, learning_rate=0.001, batch_size=128):
    # Prepare features using time index
    X = np.arange(len(returns), dtype=np.float64).reshape(-1, 1)
    y = returns.values.reshape(-1, 1)
    # Gaussian Process Regression with a ChangePointKernel for trend extraction
    base_kernel = Matern32()
    kernel = ChangePointKernel(base_kernel, changepoints)
    gpr = GPR(data=(X, y), kernel=kernel)
    set_trainable(gpr.likelihood.variance, False)
    trend = gpr.predict_f(X)[0].numpy().flatten()
    # Combine returns and extracted trend to form features
    features = np.hstack([returns.values.reshape(-1, 1), trend.reshape(-1, 1)])
    split_index = int(len(features) * train_ratio)
    train_features = features[:split_index]
    train_labels = returns.values[:split_index]
    train_features = train_features.reshape((train_features.shape[0], 1, train_features.shape[1]))
    # Build LSTM model
    model = Sequential([
        LSTM(lstm_hidden_units, return_sequences=True, input_shape=(train_features.shape[1], train_features.shape[2])),
        Dropout(dropout_rate),
        LSTM(int(lstm_hidden_units / 2)),
        Dense(1, activation="tanh")
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
    model.fit(train_features, train_labels, epochs=epochs, batch_size=batch_size, verbose=0)
    all_features = features.reshape((features.shape[0], 1, features.shape[1]))
    predicted_signals = model.predict(all_features, verbose=0).flatten()
    positions = np.clip(predicted_signals, -1, 1)
    return positions

def backtest_strategy(price_series, positions):
    if isinstance(price_series, pd.DataFrame):
        price_series = price_series.squeeze()
    positions = pd.Series(positions, index=price_series.index, name=price_series.name)
    entries = positions > 0
    exits = positions < 0
    pf = vbt.Portfolio.from_signals(
        close=price_series, 
        entries=entries, 
        exits=exits, 
        size=np.abs(positions), 
        freq="1D"
    )
    return pf

def evaluate_performance(portfolio):
    print("\nPortfolio Metrics:")
    print(f"Total Return: {float(portfolio.total_return()):.2%}")
    print(f"Sharpe Ratio: {float(portfolio.sharpe_ratio()):.2f}")
    print(f"Max Drawdown: {float(portfolio.max_drawdown()):.2%}")
    fig_value = portfolio.plot_value(title=f'Portfolio Value Over Time - {portfolio.close.name}')
    fig_value.show()
    fig_drawdowns = portfolio.plot_drawdowns(title=f'Portfolio Drawdowns - {portfolio.close.name}')
    fig_drawdowns.show()

def main():
    ticker = "BTC-USD"
    price_series, returns = fetch_and_process_data(
        ticker=ticker, 
        start_date="2024-01-01", 
        end_date="2025-01-01"
    )
    # Fixed hyperparameters
    params = {
        'batch_size': 128,
        'dropout_rate': 0.2,
        'learning_rate': 0.001,
        'lookback_window': 21,
        'lstm_hidden_units': 40
    }
    changepoints = detect_changepoints(returns, lookback_window=params['lookback_window'])
    positions = generate_signals(
        returns, changepoints,
        train_ratio=0.8,
        epochs=50,
        lookback_window=params['lookback_window'],
        dropout_rate=params['dropout_rate'],
        lstm_hidden_units=params['lstm_hidden_units'],
        learning_rate=params['learning_rate'],
        batch_size=params['batch_size']
    )
    portfolio = backtest_strategy(price_series, positions)
    evaluate_performance(portfolio)
    print(portfolio.stats())

if __name__ == "__main__":
    main()

[*********************100%***********************]  1 of 1 completed

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.




Portfolio Metrics:
Total Return: 55.18%
Sharpe Ratio: 1.44
Max Drawdown: -21.80%


Start                               2024-01-02 00:00:00
End                                 2024-12-31 00:00:00
Period                                365 days 00:00:00
Start Value                                       100.0
End Value                                    155.179572
Total Return [%]                              55.179572
Benchmark Return [%]                         107.814556
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              21.804052
Max Drawdown Duration                 211 days 00:00:00
Total Trades                                         93
Total Closed Trades                                  92
Total Open Trades                                     1
Open Trade PnL                                      0.0
Win Rate [%]                                  35.869565
Best Trade [%]                                26.811372
Worst Trade [%]                               -6