In [2]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from gpflow.kernels import Matern32, Kernel
from gpflow.models import GPR
from gpflow import set_trainable
from sklearn.preprocessing import StandardScaler
import vectorbt as vbt
from datetime import datetime, timedelta
import ruptures as rpt
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

# Define the custom ChangePointKernel for Gaussian Process Regression
class ChangePointKernel(Kernel):
    def __init__(self, base_kernel, changepoints):
        super().__init__()
        self.base_kernel = base_kernel
        # changepoints: list of indices where a regime change is detected.
        self.changepoints = changepoints

    def get_region(self, X):
        cp = tf.constant(self.changepoints, dtype=X.dtype)
        regions = tf.searchsorted(cp, X[:, 0], side='right')
        return regions

    def K(self, X, X2=None):
        if X2 is None:
            X2 = X
        regions_X = self.get_region(X)
        regions_X2 = self.get_region(X2)
        regions_equal = tf.cast(tf.equal(tf.expand_dims(regions_X, 1), tf.expand_dims(regions_X2, 0)), X.dtype)
        base_cov = self.base_kernel.K(X, X2)
        return base_cov * regions_equal

    def K_diag(self, X):
        return self.base_kernel.K_diag(X)

def fetch_and_process_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    if data.empty:
        raise ValueError(f"No data found for {ticker} between {start_date} and {end_date}")
    # Prefer "Adj Close" if available, otherwise "Close"
    if "Adj Close" in data.columns:
        price_series = data["Adj Close"].copy()
    else:
        price_series = data["Close"].copy()
    if isinstance(price_series, pd.DataFrame):
        price_series = price_series.squeeze()
    price_series.name = ticker
    returns = price_series.pct_change().dropna()
    scaler = StandardScaler()
    std_returns = scaler.fit_transform(returns.values.reshape(-1, 1))
    valid_index = returns.index
    return price_series.loc[valid_index], pd.Series(
        std_returns.flatten(), index=valid_index, name="Standardized Returns"
    )

def detect_changepoints(returns, lookback_window):
    # Use ruptures to detect changepoints using an RBF model.
    algo = rpt.Pelt(model="rbf").fit(returns.values)
    # Use the lookback_window as penalty; this is a simplified proxy for CPD LBW.
    result = algo.predict(pen=lookback_window)
    return result

def generate_signals(returns, changepoints, train_ratio, epochs, 
                     dropout_rate, lstm_hidden_units, learning_rate, batch_size):
    # Use time index as a feature.
    X = np.arange(len(returns), dtype=np.float64).reshape(-1, 1)
    y = returns.values.reshape(-1, 1)
    # Use Gaussian Process Regression with the ChangePointKernel for trend extraction.
    base_kernel = Matern32()
    kernel = ChangePointKernel(base_kernel, changepoints)
    gpr = GPR(data=(X, y), kernel=kernel)
    set_trainable(gpr.likelihood.variance, False)
    trend = gpr.predict_f(X)[0].numpy().flatten()
    # Combine returns and trend to form a feature set.
    features = np.hstack([returns.values.reshape(-1, 1), trend.reshape(-1, 1)])
    split_index = int(len(features) * train_ratio)
    train_features = features[:split_index]
    train_labels = returns.values[:split_index]
    # Reshape features for LSTM input.
    train_features = train_features.reshape((train_features.shape[0], 1, train_features.shape[1]))
    # Build the LSTM model.
    model = Sequential([
        LSTM(lstm_hidden_units, return_sequences=True,
             input_shape=(train_features.shape[1], train_features.shape[2])),
        Dropout(dropout_rate),
        LSTM(int(lstm_hidden_units / 2)),
        Dense(1, activation="tanh")
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
    model.fit(train_features, train_labels, epochs=epochs, batch_size=batch_size, verbose=0)
    # Predict signals for all features.
    all_features = features.reshape((features.shape[0], 1, features.shape[1]))
    predicted_signals = model.predict(all_features, verbose=0).flatten()
    positions = np.clip(predicted_signals, -1, 1)
    return positions

def backtest_strategy(price_series, positions):
    if isinstance(price_series, pd.DataFrame):
        price_series = price_series.squeeze()
    positions = pd.Series(positions, index=price_series.index, name=price_series.name)
    entries = positions > 0
    exits = positions < 0
    pf = vbt.Portfolio.from_signals(
        close=price_series,
        entries=entries,
        exits=exits,
        size=np.abs(positions),
        freq="1D",
        fees=0.001,
    )
    return pf

def compute_trade_statistics(pf, price_series):
    # Extract trade records from the portfolio
    trades = pf.trades.records
    if trades.empty:
        win_rate = np.nan
        avg_win = np.nan
        avg_loss = np.nan
    else:
        # Assuming 'pnl' column exists representing profit (can be negative).
        wins = trades[trades.pnl > 0]
        losses = trades[trades.pnl < 0]
        win_rate = len(wins) / len(trades) if len(trades) > 0 else np.nan
        avg_win = wins.pnl.mean() if not wins.empty else np.nan
        avg_loss = losses.pnl.mean() if not losses.empty else np.nan
    return win_rate, avg_win, avg_loss

def main():
    # Fixed hyperparameters.
    params = {
        'batch_size': 128,
        'dropout_rate': 0.2,
        'learning_rate': 0.001,
        'lookback_window': 21,
        'lstm_hidden_units': 40,
        'epochs': 50,
        'train_ratio': 0.8
    }
    
    # List of top 10 cryptocurrencies (excluding stablecoins).
    top_cryptos = [
        "BTC-USD",  # Bitcoin
        # "ETH-USD",  # Ethereum
        # "BNB-USD",  # Binance Coin
        # "XRP-USD",  # XRP
        # "ADA-USD",  # Cardano
        # "SOL-USD",  # Solana
        # "DOT-USD",  # Polkadot
        # "DOGE-USD", # Dogecoin
        # "AVAX-USD", # Avalanche
        # "MATIC-USD" # Polygon (MATIC)
    ]

    # Define test periods.
    periods = []
    for year in range(2019, 2025):
        period_label = str(year)
        start_date = f"{year}-01-01"
        end_date = f"{year}-12-31"
        periods.append((period_label, start_date, end_date))
    # YTD 2025 using current date as provided in session (2025-02-24).
    periods.append(("2025_YTD", "2025-01-01", "2025-02-24"))

    results = []

    # Loop over each cryptocurrency and each period.
    for crypto in top_cryptos:
        for period_label, start_date, end_date in periods:
            print(f"Processing {crypto} for period {period_label} ({start_date} to {end_date})")
            try:
                price_series, returns = fetch_and_process_data(crypto, start_date, end_date)
                if len(returns) < params['lookback_window']:
                    print(f"Not enough data for {crypto} in period {period_label}. Skipping.")
                    continue
                
                changepoints = detect_changepoints(returns, lookback_window=params['lookback_window'])
                positions = generate_signals(
                    returns, changepoints,
                    train_ratio=params['train_ratio'],
                    epochs=params['epochs'],
                    dropout_rate=params['dropout_rate'],
                    lstm_hidden_units=params['lstm_hidden_units'],
                    learning_rate=params['learning_rate'],
                    batch_size=params['batch_size']
                )
                portfolio = backtest_strategy(price_series, positions)
                
                result = {
					"crypto": crypto,
					**{key: str(value) if pd.isna(value) else value for key, value in portfolio.stats().items()}
				}
                # result = {
                #     "crypto": crypto,
                #     "period": period_label,
                #     "start_date": start_date,
                #     "end_date": end_date,
                #     "total_return": total_return,
                #     "sharpe_ratio": sharpe_ratio,
                #     "max_drawdown": max_drawdown,
                #     "benchmark_return": benchmark_return,
                #     "win_rate": win_rate,
                #     "avg_win": avg_win,
                #     "avg_loss": avg_loss,
                #     "calmar_ratio": calmar_ratio,
                #     "omega_ratio": omega_ratio,
                #     "sortino_ratio": sortino_ratio,
                #     "portfolio_stats_file": stats_csv
                # }
                
                results.append(result)
                
                print(result)
                
                # stats_dict = portfolio.stats().apply(str).to_dict()
                # print(stats_dict)
            
            except Exception as e:
                print(f"Error processing {crypto} for period {period_label}: {e}")

    # Save all summary results into a DataFrame.
    results_df = pd.DataFrame(results)
    print("\nBacktest Results:")
    print(results_df)
    # Optionally, save the DataFrame to a CSV file.
    results_df.to_csv("crypto_backtest_results_incfees.csv", index=False)

if __name__ == "__main__":
    main()

[*********************100%***********************]  1 of 1 completed

Processing BTC-USD for period 2019 (2019-01-01 to 2019-12-31)



  super().__init__(**kwargs)




[*********************100%***********************]  1 of 1 completed

{'crypto': 'BTC-USD', 'Start': Timestamp('2019-01-02 00:00:00'), 'End': Timestamp('2019-12-30 00:00:00'), 'Period': Timedelta('363 days 00:00:00'), 'Start Value': 100.0, 'End Value': 175.52785668828636, 'Total Return [%]': 75.52785668828635, 'Benchmark Return [%]': 84.94136249508948, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 23.222059210832565, 'Max Drawdown [%]': 22.717277993578048, 'Max Drawdown Duration': Timedelta('89 days 00:00:00'), 'Total Trades': 82, 'Total Closed Trades': 82, 'Total Open Trades': 0, 'Open Trade PnL': 0.0, 'Win Rate [%]': 34.146341463414636, 'Best Trade [%]': 20.367978133306725, 'Worst Trade [%]': -13.19696298289473, 'Avg Winning Trade [%]': 6.2322912184005705, 'Avg Losing Trade [%]': -1.86391594405084, 'Avg Winning Trade Duration': Timedelta('3 days 06:51:25.714285714'), 'Avg Losing Trade Duration': Timedelta('1 days 05:46:40'), 'Profit Factor': 1.4989521833794592, 'Expectancy': 0.9210714230278823, 'Sharpe Ratio': 1.416776434534917, 'Calmar Ratio': 3


  super().__init__(**kwargs)
[*********************100%***********************]  1 of 1 completed

{'crypto': 'BTC-USD', 'Start': Timestamp('2020-01-02 00:00:00'), 'End': Timestamp('2020-12-30 00:00:00'), 'Period': Timedelta('364 days 00:00:00'), 'Start Value': 100.0, 'End Value': 213.08093586869765, 'Total Return [%]': 113.08093586869765, 'Benchmark Return [%]': 312.8706048121783, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 22.036726666283727, 'Max Drawdown [%]': 28.25724088492054, 'Max Drawdown Duration': Timedelta('181 days 00:00:00'), 'Total Trades': 93, 'Total Closed Trades': 92, 'Total Open Trades': 1, 'Open Trade PnL': 12.774531489290553, 'Win Rate [%]': 36.95652173913043, 'Best Trade [%]': 24.630091398863584, 'Worst Trade [%]': -7.199908760487301, 'Avg Winning Trade [%]': 5.385717477983282, 'Avg Losing Trade [%]': -1.756697583687524, 'Avg Winning Trade Duration': Timedelta('2 days 21:10:35.294117647'), 'Avg Losing Trade Duration': Timedelta('1 days 03:18:37.241379310'), 'Profit Factor': 1.8433249773143905, 'Expectancy': 1.09028700412399, 'Sharpe Ratio': 1.93808792560


  super().__init__(**kwargs)
[*********************100%***********************]  1 of 1 completed

{'crypto': 'BTC-USD', 'Start': Timestamp('2021-01-02 00:00:00'), 'End': Timestamp('2021-12-30 00:00:00'), 'Period': Timedelta('363 days 00:00:00'), 'Start Value': 100.0, 'End Value': 105.21668857458666, 'Total Return [%]': 5.216688574586655, 'Benchmark Return [%]': 46.847611255066646, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 19.652562309919848, 'Max Drawdown [%]': 48.57577575336976, 'Max Drawdown Duration': Timedelta('272 days 00:00:00'), 'Total Trades': 90, 'Total Closed Trades': 89, 'Total Open Trades': 1, 'Open Trade PnL': -0.105216688574572, 'Win Rate [%]': 32.58426966292135, 'Best Trade [%]': 29.403803146768624, 'Worst Trade [%]': -13.50842375136319, 'Avg Winning Trade [%]': 6.790657297278667, 'Avg Losing Trade [%]': -2.918133437359128, 'Avg Winning Trade Duration': Timedelta('3 days 04:57:55.862068965'), 'Avg Losing Trade Duration': Timedelta('1 days 10:00:00'), 'Profit Factor': 1.0271741262144842, 'Expectancy': 0.059796688350125304, 'Sharpe Ratio': 0.35908548974388527


  super().__init__(**kwargs)
[*********************100%***********************]  1 of 1 completed

{'crypto': 'BTC-USD', 'Start': Timestamp('2022-01-02 00:00:00'), 'End': Timestamp('2022-12-30 00:00:00'), 'Period': Timedelta('363 days 00:00:00'), 'Start Value': 100.0, 'End Value': 67.36337844235334, 'Total Return [%]': -32.63662155764666, 'Benchmark Return [%]': -64.9329195727921, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 13.83584916210092, 'Max Drawdown [%]': 44.29783158761285, 'Max Drawdown Duration': Timedelta('318 days 00:00:00'), 'Total Trades': 81, 'Total Closed Trades': 80, 'Total Open Trades': 1, 'Open Trade PnL': -0.22883021857623476, 'Win Rate [%]': 35.0, 'Best Trade [%]': 17.036360875135575, 'Worst Trade [%]': -9.89748724052532, 'Avg Winning Trade [%]': 3.5915862991709324, 'Avg Losing Trade [%]': -2.470445873468575, 'Avg Winning Trade Duration': Timedelta('3 days 21:25:42.857142857'), 'Avg Losing Trade Duration': Timedelta('1 days 14:46:09.230769230'), 'Profit Factor': 0.7203991183320881, 'Expectancy': -0.40509739173838, 'Sharpe Ratio': -0.7679927811371414, 'Cal


  super().__init__(**kwargs)
[*********************100%***********************]  1 of 1 completed

{'crypto': 'BTC-USD', 'Start': Timestamp('2023-01-02 00:00:00'), 'End': Timestamp('2023-12-30 00:00:00'), 'Period': Timedelta('363 days 00:00:00'), 'Start Value': 100.0, 'End Value': 176.38703054414393, 'Total Return [%]': 76.38703054414393, 'Benchmark Return [%]': 152.61093777667676, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 26.006634630333274, 'Max Drawdown [%]': 18.455055789424833, 'Max Drawdown Duration': Timedelta('219 days 00:00:00'), 'Total Trades': 91, 'Total Closed Trades': 91, 'Total Open Trades': 0, 'Open Trade PnL': 0.0, 'Win Rate [%]': 34.065934065934066, 'Best Trade [%]': 21.951030527276448, 'Worst Trade [%]': -5.3757395984347065, 'Avg Winning Trade [%]': 4.8562589437187915, 'Avg Losing Trade [%]': -1.3706460351455836, 'Avg Winning Trade Duration': Timedelta('2 days 19:21:17.419354838'), 'Avg Losing Trade Duration': Timedelta('1 days 01:12:00'), 'Profit Factor': 1.6294304182670982, 'Expectancy': 0.8394179180675161, 'Sharpe Ratio': 1.8444720032424051, 'Calmar Rat


  super().__init__(**kwargs)
[*********************100%***********************]  1 of 1 completed

{'crypto': 'BTC-USD', 'Start': Timestamp('2024-01-02 00:00:00'), 'End': Timestamp('2024-12-30 00:00:00'), 'Period': Timedelta('364 days 00:00:00'), 'Start Value': 100.0, 'End Value': 139.02045316889385, 'Total Return [%]': 39.02045316889385, 'Benchmark Return [%]': 106.06627370703912, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 22.63975839849399, 'Max Drawdown [%]': 27.67153191858016, 'Max Drawdown Duration': Timedelta('216 days 00:00:00'), 'Total Trades': 91, 'Total Closed Trades': 91, 'Total Open Trades': 0, 'Open Trade PnL': 0.0, 'Win Rate [%]': 34.065934065934066, 'Best Trade [%]': 26.58456016789322, 'Worst Trade [%]': -6.2260737637703905, 'Avg Winning Trade [%]': 4.859658093911704, 'Avg Losing Trade [%]': -1.7988588828491823, 'Avg Winning Trade Duration': Timedelta('3 days 06:11:36.774193548'), 'Avg Losing Trade Duration': Timedelta('1 days 04:00:00'), 'Profit Factor': 1.2807633031653607, 'Expectancy': 0.42879618866916225, 'Sharpe Ratio': 1.1006611864718239, 'Calmar Ratio'


  super().__init__(**kwargs)


{'crypto': 'BTC-USD', 'Start': Timestamp('2025-01-02 00:00:00'), 'End': Timestamp('2025-02-23 00:00:00'), 'Period': Timedelta('53 days 00:00:00'), 'Start Value': 100.0, 'End Value': 92.16954613634513, 'Total Return [%]': -7.830453863654867, 'Benchmark Return [%]': -0.6326482560202185, 'Max Gross Exposure [%]': 100.0, 'Total Fees Paid': 2.3523303354861387, 'Max Drawdown [%]': 12.923549909127644, 'Max Drawdown Duration': Timedelta('39 days 00:00:00'), 'Total Trades': 12, 'Total Closed Trades': 12, 'Total Open Trades': 0, 'Open Trade PnL': 0.0, 'Win Rate [%]': 25.0, 'Best Trade [%]': 5.132967452155988, 'Worst Trade [%]': -3.6811432127257717, 'Avg Winning Trade [%]': 2.316055466770851, 'Avg Losing Trade [%]': -1.6434910181625155, 'Avg Winning Trade Duration': Timedelta('3 days 16:00:00'), 'Avg Losing Trade Duration': Timedelta('2 days 10:40:00'), 'Profit Factor': 0.4691419464567889, 'Expectancy': -0.6525378219712371, 'Sharpe Ratio': -1.636802360675206, 'Calmar Ratio': -3.324757580953013, '