In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys

from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
sys.path.append('/Users/markwindsor/Desktop/atlas_trade')
from src.utils.technical_indicators import *
import warnings
warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8")

class StatArbCryptoBacktest():

    def __init__(self, data_raw, symbol_one, symbol_two, trading_costs):
        self.symbol_one = symbol_one
        self.symbol_two = symbol_two
        self.data_raw = data
        self.trading_costs = trading_costs

        self.data = self.set_data(data_raw)


        self.results = None
        
    def __repr__(self):
        return "Statistical Arbitrage (XRPUSDT & THETAUSDT))"
    
    def set_data(self, data):
        data['xrp_returns'] = np.log(data['xrp_close'] / data['xrp_close'].shift(1))
        data['theta_returns'] = np.log(data['theta_close'] / data['theta_close'].shift(1))
        data.set_index('time', inplace=True)
        return data.dropna()  # Remove NaN values caused by shift
    
    def compute_hedge_ratio(self, y, x):
        df = pd.DataFrame({
            'x': x,
            'y': y
        })
        df.dropna(inplace=True)
        x_aligned = df['x'].values.reshape(-1, 1)
        y_aligned = df['y'].values
        model = LinearRegression().fit(x_aligned, y_aligned)
        return model.coef_[0]
    

    def compute_spread(self, y, x, hedge_ratio):
        return y - hedge_ratio * x

    def prepare_data(self):
        
        # Create a copy of dataOne
        working_data = self.data.copy()

        hedge_ratios = []

        for i in range(len(working_data) - 335):
            y = working_data['xrp_close'].iloc[i:i+336]
            x = working_data['theta_close'].iloc[i:i+336]
            hedge_ratios.append(self.compute_hedge_ratio(y, x))

        # Padding the hedge_ratios list with NaNs for the initial windows
        hedge_ratios = [np.NaN]*335 + hedge_ratios
        working_data['hedge_ratio'] = hedge_ratios

        # Calculate the spread
        working_data['spread'] = self.compute_spread(working_data['xrp_close'], working_data['theta_close'], working_data['hedge_ratio'])

        # Calculate the rolling mean and standard deviation of the spread
        working_data['mean_spread'] = working_data['spread'].rolling(
            window=336).mean()
        working_data['std_spread'] = working_data['spread'].rolling(
            window=336).std()

        # Calculate z-score
        working_data['zscore'] = (
            working_data['spread'] - working_data['mean_spread']) / working_data['std_spread']

        # Determine trading signals based on z-score
        working_data['trading_signal'] = np.where(
            working_data['zscore'] < -1.8, 1,  # Buy condition
            np.where(working_data['zscore'] > 1.8, -1,  # Sell condition
                        np.where((working_data['zscore'] > 0) & (working_data['zscore'].shift(1) <= 0), 0,  # Crossed above zero line
                                np.where((working_data['zscore'] < 0) & (working_data['zscore'].shift(1) >= 0), 0,  # Crossed below zero line
                                        np.NAN))))  # Default value

        # Fill NaN values with previous signal
        working_data['trading_signal'].fillna(
            method='ffill', inplace=True)

        self.results = working_data.copy()

    def run_backtest(self):
        data = self.results.copy()

        # Calculate strategy returns
        # If going long on the spread: returns of symbol_one minus returns of symbol_two
        data['strategy'] = np.where(data['trading_signal'].shift(1) == 1,
                                    data["xrp_returns"] - data['theta_returns'],
                                    # If going short on the spread: returns of symbol_two minus returns of symbol_one
                                    np.where(data['trading_signal'].shift(1) == -1,
                                            data['theta_returns'] - data["xrp_returns"],
                                            0))

        # Subtract trading costs for every trade
        data["trades"] = data.trading_signal.diff().fillna(0).abs()
        data.strategy -= data.trades * self.trading_costs

        self.results = data

    def calculate_sharpe_ratio(self):
        # Calculate excess returns
        hourly_rf = (1 + 0.03) ** (1/(365 * 24)) - 1
        excess_returns = self.results['strategy'] - hourly_rf
        # Calculate Sharpe Ratio
        sharpe_ratio = excess_returns.mean() / excess_returns.std()
        return sharpe_ratio


    def test_strategy(self):
        self.prepare_data()
        self.run_backtest()
        
        data = self.results.copy()
        data["cum_xrp_returns"] = data["xrp_returns"].cumsum().apply(np.exp)
        data["cum_theta_returns"] = data["theta_returns"].cumsum().apply(np.exp)
        data["cum_strategy"] = data["strategy"].cumsum().apply(np.exp)
        self.results = data

        self.results.to_csv('statArbBacktest.csv', index=True)
        self.print_performance()

    def plot_results(self):
        if self.results is None:
            print("Run test_strategy() first.")
            return

        title = "XRP vs THETA | Trading Costs = {}".format(self.trading_costs)
        self.results[["cum_xrp_returns", "cum_theta_returns", "cum_strategy"]].plot(title=title, figsize=(12, 8))

    def print_performance(self):
        if self.results is None:
            print("No results to display.")
            return

        strategy_performance = round(self.calculate_multiple(self.results['strategy']), 6)
        sharpe_ratio = round(self.calculate_sharpe_ratio(), 4)
        print(100 * "=")
        print("Statistical Arbitrage | INSTRUMENT = XRPUSDT | THETAUSDT")
        print(100 * "-")
        print("PERFORMANCE MEASURES:")
        print("\n")
        print("Multiple (Strategy):         {}".format(strategy_performance))
        print("Sharpe Ratio (Strategy):     {}".format(sharpe_ratio))
        print(38 * "-")
        print(100 * "=")

    def calculate_multiple(self, series):
        return np.exp(series.sum())

data_path = '/Users/markwindsor/Desktop/atlas_trade/src/strategies/statistical_arbitrage_crypto/backtest_data_1h.csv'
data = pd.read_csv(data_path)


symbol_one = "XRPUSDT"
symbol_two = "THETAUSDT"

tc = -0.0005

tester = StatArbCryptoBacktest(data, symbol_one, symbol_two, tc)
tester.test_strategy()

Statistical Arbitrage | INSTRUMENT = XRPUSDT | THETAUSDT
----------------------------------------------------------------------------------------------------
PERFORMANCE MEASURES:


Multiple (Strategy):         1.12509
Sharpe Ratio (Strategy):     0.005
--------------------------------------


In [7]:

class StatArbCryptoBacktest:
    def __init__(self, data, symbol_one, symbol_two, transaction_cost):
        self.data = data
        self.symbol_one = symbol_one
        self.symbol_two = symbol_two
        self.tc = transaction_cost
        self.results = None

    @staticmethod
    def compute_hedge_ratio(y, x):
        y, x = y.dropna(), x.dropna()
        x = sm.add_constant(x)
        model = sm.OLS(y, x).fit()
        return model.params[1]

    def set_signals(self):
        df = self.data.copy()
        df['hedge_ratio'] = df.rolling(window=336).apply(lambda window: self.compute_hedge_ratio(window[self.symbol_two], window[self.symbol_one]), raw=False)
        df['spread'] = df[self.symbol_one] - df['hedge_ratio'] * df[self.symbol_two]
        
        df['mean_spread'] = df['spread'].rolling(window=336).mean()
        df['std_spread'] = df['spread'].rolling(window=336).std()
        df['zscore'] = (df['spread'] - df['mean_spread']) / df['std_spread']

        df['trading_signal'] = 0
        df.loc[df['zscore'] < -1.8, 'trading_signal'] = 1
        df.loc[df['zscore'] > 1.8, 'trading_signal'] = -1
        df['trading_signal'] = df['trading_signal'].replace(to_replace=0, method='ffill')

        # Go neutral (0) when the zscore crosses the zero line
        df.loc[(df['zscore'] * df['zscore'].shift(1) < 0), 'trading_signal'] = 0
        
        self.results = df.copy()

    def backtest(self):
        if self.results is None:
            self.set_signals()
        
        # Strategy Returns without transaction cost
        self.results['strategy_returns'] = self.results['trading_signal'].shift(1) * self.results[self.symbol_one].pct_change()
        self.results['strategy_returns'] -= self.tc * abs(self.results['trading_signal'].diff())
        
        # Cumulative Returns
        self.results['cumulative_market_returns'] = (1 + self.results[self.symbol_one].pct_change()).cumprod()
        self.results['cumulative_strategy_returns'] = (1 + self.results['strategy_returns']).cumprod()
        
        # Set initial capital to 1 for both market and strategy cumulative returns
        self.results['cumulative_market_returns'].iloc[0] = 1
        self.results['cumulative_strategy_returns'].iloc[0] = 1

    def analyze_performance(self):
        # Assuming you want some basic performance metrics
        total_return = self.results['cumulative_strategy_returns'].last('1D') - 1
        annualized_return = total_return / len(self.results) * 252  # 252 trading days in a year
        annualized_vol = self.results['strategy_returns'].std() * (252**0.5)
        sharpe_ratio = annualized_return / annualized_vol

        print(f"Total Return: {total_return:.2%}")
        print(f"Annualized Return: {annualized_return:.2%}")
        print(f"Annualized Volatility: {annualized_vol:.2%}")
        print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

        # Plot cumulative returns
        self.results[['cumulative_market_returns', 'cumulative_strategy_returns']].plot(figsize=(12, 6))


df = pd.read_csv(data_path)
print(df.head())


df.set_index('time', inplace=True)

# Check the columns exist in the DataFrame to rule out issues related to them
assert 'xrp_close' in df.columns
assert 'theta_close' in df.columns


tester = StatArbCryptoBacktest(df, 'xrp_close', 'theta_close', 0.0005)
tester.backtest()
tester.analyze_performance()


                  time   xrp_volume  theta_volume  xrp_close  theta_close
0  2023-05-30 16:30:00  107004259.2      789975.6       0.52         0.87
1  2023-05-30 17:30:00   63777449.9      482664.7       0.52         0.87
2  2023-05-30 18:30:00  353628002.4      862576.5       0.52         0.87
3  2023-05-30 19:30:00   88802622.4      431916.1       0.52         0.87
4  2023-05-30 20:30:00   61217870.5      721893.4       0.52         0.87


KeyError: 'theta_close'

In [None]:
tester.plot_results()