In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys

from sklearn.linear_model import LinearRegression
sys.path.append('/Users/markwindsor/Desktop/atlas_trade')
from src.utils.technical_indicators import *
import warnings
warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8")

class StatArbCryptoBacktest():

    def __init__(self, data_raw, symbol_one, symbol_two, trading_costs):
        self.symbol_one = symbol_one
        self.symbol_two = symbol_two
        self.data_raw = data
        self.trading_costs = trading_costs

        self.data = self.set_data(data_raw)


        self.results = None
        
    def __repr__(self):
        return "Statistical Arbitrage (XRPUSDT & THETAUSDT))"
    
    def set_data(self, data):
        data['xrp_returns'] = np.log(data['xrp_close'] / data['xrp_close'].shift(1))
        data['theta_returns'] = np.log(data['theta_close'] / data['theta_close'].shift(1))
        data.set_index('time', inplace=True)
        return data.dropna()  # Remove NaN values caused by shift
    
    def compute_hedge_ratio(self, y, x):
        # 1. Merge the two series on datetime index
        df = pd.DataFrame({
            'x': x,
            'y': y
        })

        # 2. Drop rows with NaN values to ensure alignment
        df.dropna(inplace=True)

        # 3. Extract the aligned series
        x_aligned = df['x'].values.reshape(-1, 1)
        y_aligned = df['y'].values

        model = LinearRegression().fit(x_aligned, y_aligned)
        return model.coef_[0]
    

    def compute_spread(self, y, x, hedge_ratio):
        return y - hedge_ratio * x

    def prepare_data(self):
        
         # Create a copy of dataOne
        working_data = self.data.copy()

        # Calculate hedge ratio
        hedge_ratio = self.compute_hedge_ratio(
            working_data['xrp_close'], working_data['theta_close'])

        # Calculate the spread
        working_data['spread'] = self.compute_spread(
            working_data['xrp_close'], self.data['theta_close'], hedge_ratio)

        # Calculate the rolling mean and standard deviation of the spread
        working_data['mean_spread'] = working_data['spread'].rolling(
            window=336).mean()
        working_data['std_spread'] = working_data['spread'].rolling(
            window=336).std()

        # Calculate z-score
        working_data['zscore'] = (
            working_data['spread'] - working_data['mean_spread']) / working_data['std_spread']

        # Determine trading signals based on z-score
        working_data['trading_signal'] = np.where(
            working_data['zscore'] < -1.8, 1,  # Buy condition
            np.where(working_data['zscore'] > 1.8, -1,  # Sell condition
                        np.where((working_data['zscore'] > 0) & (working_data['zscore'].shift(1) <= 0), 0,  # Crossed above zero line
                                np.where((working_data['zscore'] < 0) & (working_data['zscore'].shift(1) >= 0), 0,  # Crossed below zero line
                                        np.NAN))))  # Default value

        # Fill NaN values with previous signal
        working_data['trading_signal'].fillna(
            method='ffill', inplace=True)

        self.results = working_data.copy()

    def run_backtest(self):
        data = self.results.copy()

        # Calculate strategy returns
        # If going long on the spread: returns of symbol_one minus returns of symbol_two
        data['strategy'] = np.where(data['trading_signal'].shift(1) == 1,
                                    data["xrp_returns"] - data['theta_returns'],
                                    # If going short on the spread: returns of symbol_two minus returns of symbol_one
                                    np.where(data['trading_signal'].shift(1) == -1,
                                            data['theta_returns'] - data["xrp_returns"],
                                            0))

        # Subtract trading costs for every trade
        data["trades"] = data.trading_signal.diff().fillna(0).abs()
        data.strategy -= data.trades * self.trading_costs

        self.results = data

    def calculate_sharpe_ratio(self):
        # Calculate excess returns
        hourly_rf = (1 + 0.03) ** (1/(365 * 24)) - 1
        excess_returns = self.results['strategy'] - hourly_rf
        # Calculate Sharpe Ratio
        sharpe_ratio = excess_returns.mean() / excess_returns.std()
        return sharpe_ratio


    def test_strategy(self):
        self.prepare_data()
        self.run_backtest()
        
        data = self.results.copy()
        data["cum_xrp_returns"] = data["xrp_returns"].cumsum().apply(np.exp)
        data["cum_theta_returns"] = data["theta_returns"].cumsum().apply(np.exp)
        data["cum_strategy"] = data["strategy"].cumsum().apply(np.exp)
        self.results = data

        self.results.to_csv('statArbBacktest.csv', index=False)
        self.print_performance()

    def plot_results(self):
        if self.results is None:
            print("Run test_strategy() first.")
            return

        title = "XRP vs THETA | Trading Costs = {}".format(self.trading_costs)
        self.results[["cum_xrp_returns", "cum_theta_returns", "cum_strategy"]].plot(title=title, figsize=(12, 8))

    def print_performance(self):
        if self.results is None:
            print("No results to display.")
            return

        strategy_performance = round(self.calculate_multiple(self.results['strategy']), 6)
        sharpe_ratio = round(self.calculate_sharpe_ratio(), 4)
        print(100 * "=")
        print("Statistical Arbitrage | INSTRUMENT = XRPUSDT | THETAUSDT")
        print(100 * "-")
        print("PERFORMANCE MEASURES:")
        print("\n")
        print("Multiple (Strategy):         {}".format(strategy_performance))
        print("Sharpe Ratio (Strategy):     {}".format(sharpe_ratio))
        print(38 * "-")
        print(100 * "=")

    def calculate_multiple(self, series):
        return np.exp(series.sum())

data_path = '/Users/markwindsor/Desktop/atlas_trade/src/strategies/statistical_arbitrage_crypto/backtest_data_1h.csv'
data = pd.read_csv(data_path)


symbol_one = "XRPUSDT"
symbol_two = "THETAUSDT"

tc = -0.0005

tester = StatArbCryptoBacktest(data, symbol_one, symbol_two, tc)
tester.test_strategy()

KeyError: 'returns'