Simple stat-arb example

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
import yfinance as yf

Reading data from Yahoo Finance:

In [None]:
def read_stock_data(ticker):

    data = yf.download(ticker, start = '2010-01-01', end = '2024-08-01')

    data['log_ret'] = np.log(data['Close']).diff()

    return pd.DataFrame(data['log_ret'].dropna())

In [None]:
data1 = read_stock_data('V')
data2 = read_stock_data('MA')

Functions for calculating standard statistics on the daily equity curve:

In [None]:
days_in_year = 252

def Return(rets):
    """
    Annual return estimate

    :rets: daily returns of the strategy
    """
    return np.mean(rets)*days_in_year


def Volatility(rets):
    """
    Estimation of annual volatility

    :rets: daily returns of the strategy
    """
    return np.std(rets)*np.sqrt(days_in_year)


def SharpeRatio(rets):
    """
    Estimating the annual Sharpe ratio

    :rets: daily returns of the strategy
    """
    volatility = Volatility(rets)
    if (volatility>0):
        return Return(rets)/volatility
    else:
        return float('NaN')

def statistics_calc(rets, bh, name = '_', plot = False):
    """
    Draws a graph of portfolio equity and calculates annual Sharpe ratios, profitability and volatility

    :rets: daily returns of the strategy
    """
    sharpe = SharpeRatio(rets)
    ret = Return(rets)
    vol = Volatility(rets)
    if plot:
        plt.plot(rets.cumsum(), label = 'strategy')
        plt.plot(bh.cumsum(), label = 'buy & hold')
        plt.xlabel('t')
        plt.legend()
        print('Sharpe ratio = %0.2f'%sharpe)
        print('Annual Return = %0.2f'%ret)
        print('Annual Std = %0.2f'%vol)
    return  pd.DataFrame([[sharpe, ret, vol]], columns = ['Sharpe ratio', 'Annual return', 'Volatility'], index = [name])

In [None]:
def strategy_backtest(data, params, plot = False, in_sample_end = '', slippage = 0.0005, plot_position = True):
    """
    Strategy backtest calculation

    :data: dataframe with log returns
    :params: list of strategy parameters
    :plot: if True than equity curve is plotted
    :in_sample_end: string in format "%Y-%m-%d" with timestamp of in_sample_end. Only used on charts
    :slippage: slippage per trade
    :plot_position: if True than position values is plotted
    :return: statistics and equity curve
    """

    # Strategy parameters that we will optimize
    period = params[0]
    open_treshold = params[1]
    close_treshold = params[1]*params[2]

    period2 = params[3]

    pos_limit =  params[4]

    # Calculation of target position:
    features = pd.DataFrame(index = data.index)
    features['spread'] = data['log_ret'].cumsum()

    features['spread_mean'] = features['spread'].ewm(period).mean()
    features['spread_std'] =  np.abs(features['spread'] - features['spread_mean']).ewm(period).mean()
    features['z_score'] = ((features['spread'] - features['spread_mean'])/features['spread_std']).fillna(0)
    features['position'] = 5*(-features['z_score']).ewm(period2).mean()

    features['position'] = features['position'].shift(1).fillna(0).astype(int)

    # Maximum position limit
    features.loc[features.index[features['position']>pos_limit], 'position'] = pos_limit
    features.loc[features.index[features['position']<-pos_limit], 'position'] = -pos_limit

    # We calculate the equity curve and convert it to a daily timeframe to calculate basic statistics
    eq = (data['log_ret']*features['position']-slippage*features['position'].diff().abs()
         ).fillna(0).resample('1D').agg('sum')
    bh = data['log_ret'].fillna(0).resample('1D').agg('sum')

    turnover = features['position'].diff().abs().sum()

    # We calculate statistics and save the result
    stats = statistics_calc(eq, bh, name = "{0}_{1}_{2}".format(period, open_treshold, close_treshold), plot = plot)

    # Draw a graph of position changes, if necessary
    if (plot) and (plot_position):

        if in_sample_end != '':
            plt.axvline(x = datetime.datetime.strptime(in_sample_end, "%Y-%m-%d").date(), color = 'red')

        period_to_plot = 500
        plt.figure(figsize = (10, 4))
        plt.plot(features['spread'][-period_to_plot:], label = 'Spread value')
        plt.plot(features['spread_mean'][-period_to_plot:], color = 'tab:orange')
        plt.plot((features['spread_mean']+features['spread_std'])[-period_to_plot:], color = 'tab:orange', label = 'Moving average and Std range')
        plt.plot((features['spread_mean']-features['spread_std'])[-period_to_plot:], color = 'tab:orange')
        plt.legend()

        plt.figure()
        plt.figure(figsize = (10, 4))
        plt.plot(features['z_score'].ewm(period2).mean()[-period_to_plot:], label = 'z-score')

        position_to_plot = features['position'][-period_to_plot:]
        plt.plot(position_to_plot, label = 'position')
        plt.xlabel('t')
        plt.legend()

    return stats, eq, turnover

In [None]:
def opt_backtest(train, params):
    stats_current, *_ = strategy_backtest(train, params)
    return stats_current['Sharpe ratio'].iloc[0]

In [None]:
def objective(trial):

    period = trial.suggest_int("period", 5, 5000, log = True)
    open_treshold = trial.suggest_float("open_treshold", 1.0, 3.0)
    close_treshold = trial.suggest_float("close_treshold", 0.0005, 0.8)
    period2 = trial.suggest_int("period2", 5, 2000, log = True)

    pos_limit = trial.suggest_int("pos_limit", 1, 5)

    beta = trial.suggest_float("beta", 0.02, 5.0, log = True)

    data = (data1-beta*data2).dropna()

    in_sample_start = '2016-01-01'
    in_sample_end = '2023-01-01'

    train = data[in_sample_start:in_sample_end]

    obj_value = opt_backtest(train, [period, open_treshold, close_treshold, period2, pos_limit])

    if np.isnan(obj_value):
        obj_value = 0

    return obj_value

In [None]:
study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 500, show_progress_bar = True)
params = list(study.best_params.values())

data = (data1-study.best_params['beta']*data2).dropna()
in_sample_start = '2016-01-01'
in_sample_end = '2023-01-01'

train = data[in_sample_start:in_sample_end]
test = data[in_sample_start:]
stats, eq, _ = strategy_backtest(test, params, True, in_sample_end)


Additional exercises and research topics:

1. Develop and test your own statistical arbitrage strategy (you can use any instruments combinations, including creating them dynamically)