In [None]:
import yfinance as yf
from src import meanReversionTest
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

importlib.reload(meanReversionTest)

Download the specified price data using yfinance.

In [None]:
tickers = ['AAPL', 'MSFT']
data = yf.download(tickers, start='2015-01-01', end='2024-01-01', auto_adjust=False)
print(data['Adj Close'].head())
adjCloseData = data['Adj Close']

Using simple visualization techniques, see if the selected tickers might possibly be cointegrable.

In the price series plot, we are looking for the tickers to "mirror" each other.

In the scatter plot, we are looking for a relatively linear relationship.

In [None]:
meanReversionTest.plotPriceSeries(adjCloseData)
meanReversionTest.plotScatterSeries(adjCloseData)

Now, we run the CADF test.

First, we fit a linear model to the scatter plot of the two price series.
* This gives us the residuals, along with some other information.
* The residuals are the distance between the predictions of our linear model (using the price series that we set as the independent variable), and the actual price (the other price series that we set as the dependent variable)
* Here, we just choose the price series in the first column of adjCloseData to be the independent variable.

We can plot the residuals to see if, visually, they look like they might be mean reverting.

Finally, we run ADF on the residuals to test if the residuals actually are mean reverting.

In [None]:
adjCloseData, beta, intercept = meanReversionTest.fitLinearModel(adjCloseData)
meanReversionTest.plotResiduals(adjCloseData)
testStatistic, pValue, criticalValues = meanReversionTest.doCadf(adjCloseData)

If we determine, based on the test statistic and the p value, that the two price series are cointegrable, we can now specify a linear mean reverting trading strategy and backtest.

First, we calculate the z-score of the spreads/residuals.

In [None]:
spread = adjCloseData['Residuals']
spread = spread.to_numpy()
zscore = (spread - spread.mean()) / spread.std()

Then, we backtest.

In [None]:
# backtesting code
startingCapital = 10000 #used to calculate Sharpe
headers = list(adjCloseData.columns)
stock1, stock2 = headers[0], headers[1]
currentPosition = dict()
allPositions = []
dailyPnL = []
dailyReturns = []
stdSignal = 1.5

for t in range(len(zscore)):
    # for now, just work with 1 share of stock 1
    if not currentPosition:
        if zscore[t] > stdSignal:
            # long stock1, short stock2
            currentPosition = {stock1: beta, stock2: -1, 'Enter Time': t, 'Enter Spread': spread[t]}
        elif zscore[t] < -stdSignal:
            # short stock1, long stock2
            currentPosition = {stock1: -beta, stock2: 1, 'Enter Time': t, 'Enter Spread': spread[t]}
    else:
        if abs(zscore[t]) < 0.5:
            # exit positions
            if currentPosition:
                # for stock1
                profitStock1 = currentPosition[stock1] * (adjCloseData[stock1].iloc[t] - adjCloseData[stock1].iloc[currentPosition['Enter Time']])
                # for stock2
                profitStock2 = currentPosition[stock2] * (adjCloseData[stock2].iloc[t] - adjCloseData[stock2].iloc[currentPosition['Enter Time']])

                currentPosition['Exit Time'] = t
                currentPosition['profitStock1'] = profitStock1
                currentPosition['profitStock2'] = profitStock2
                currentPosition['Total Profit'] = profitStock1 + profitStock2
                currentPosition['Exit Spread'] = spread[t]
                currentPosition['Spread Difference'] = currentPosition['Exit Spread'] - currentPosition['Enter Spread']

                allPositions.append(currentPosition)
                currentPosition = dict()
    
    # calculate daily PnL
    if currentPosition:
        dailyPnLStock1 = currentPosition[stock1] * (adjCloseData[stock1].iloc[t] - adjCloseData[stock1].iloc[t - 1])
        dailyPnLStock2 = currentPosition[stock2] * (adjCloseData[stock2].iloc[t] - adjCloseData[stock2].iloc[t - 1])
        dPnL = dailyPnLStock1 + dailyPnLStock2
        dailyPnL.append(dPnL)
        dailyReturns.append(dPnL / startingCapital)
    else:
        dailyPnL.append(0)
        dailyReturns.append(0)

for i in allPositions:
    print(i)

# plot daily pnl vs time
plt.plot(dailyPnL)
plt.title('Daily PnL')
plt.xlabel('Time')
plt.ylabel('PnL ($)')
plt.show()

# plot cumulative pnl vs time
cumulativePnL = np.cumsum(dailyPnL)
plt.plot(cumulativePnL)
plt.title('Cumulative PnL')
plt.xlabel('Time')
plt.ylabel('Cumulative PnL ($)')
plt.show()

# plot daily returns vs time (with respect to a fixed $10,000 exposure each time)
plt.plot(dailyReturns)
plt.title('Daily Returns')
plt.xlabel('Time')
plt.ylabel('Daily Returns (%)')
plt.show()

Evaluate performance using:
* Equity curve (cumulative PnL)
* Sharpe ratio
* Max Drawdown

Calculation of Sharpe Ratio

Sharpe ratio tells us how much return we are getting per unit of risk we are taking. A greater value means more returns per unit risk.

For mean reversion trading strategies, we expect them to be relatively risky, as what is happening on a day-to-day basis is volatile. Most days, we don't hold a position at all, and thus have no returns. Then, when we do hold a position, it may take a while for the spread to return back to the mean, which increases risk. Even worse, the spread may increase temporarily before returning back to the mean.

In [None]:
# calculation of Sharpe (using daily returns)
meanDailyReturns = np.mean(dailyReturns)
stdDailyReturns = np.std(dailyReturns, ddof=1)
dailySharpeRatio = meanDailyReturns / stdDailyReturns
annualSharpeRatio = dailySharpeRatio * np.sqrt(252)
print(f'Daily Sharpe Ratio: {dailySharpeRatio}')
print(f'Annual Sharpe Ratio: {annualSharpeRatio}')
print("Percent positive days:", (np.array(dailyReturns) > 0).mean())

Calculating Max Drawdown, using cumulative PnL and initial capital

Max drawdown tells us what the worst-case scenario for our trading strategy would be, measuring the greatest peak-to-trough drop. It tells us the greatest loss that we would have to experience, before (hopefully) recovering. Here, it is calculated with respect to the initial capital. 

In [None]:
portfolioValues = startingCapital + cumulativePnL
# calculate running max
runningMax = []
currentMax = 0
for value in portfolioValues:
    if value > currentMax:
        currentMax = value
    runningMax.append(currentMax)

drawdowns = cumulativePnL - runningMax
drawdowns = drawdowns
maxDrawDown = np.min(drawdowns)
print(maxDrawDown)