# Import Libraries
Import necessary libraries such as NumPy, Pandas, yFinance, and Statsmodels.

In [1]:
# Import necessary libraries
import numpy as np  # For numerical computations
import pandas as pd  # For data manipulation
import yfinance as yf  # For downloading financial data
import statsmodels.api as sm  # For statistical modeling

# Define Parameters
Define key parameters including asset tickers, lookback window, standard deviation multiples, and whether to use cointegration.

In [2]:
# Define key parameters
asset1 = 'AAPL'  # Ticker for the first asset
asset2 = 'MSFT'  # Ticker for the second asset
lookback_window = 252  # Lookback window for rolling calculations
std_dev_multiples = np.arange(1.5, 3.1, 0.5)  # Range of standard deviation multiples for optimization
use_cointegration = True  # Flag to determine whether to use cointegration in the strategy

# Download Data
Use yFinance to download historical price data for the selected assets.

In [3]:
# Download historical price data for the selected assets
data1 = yf.download(asset1, start='2022-01-01', end='2024-01-01')  # Download data for asset1
data2 = yf.download(asset2, start='2022-01-01', end='2024-01-01')  # Download data for asset2

# Display the first few rows of the downloaded data for verification
data1.head(), data2.head()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


(Price            Close        High         Low        Open     Volume
 Ticker            AAPL        AAPL        AAPL        AAPL       AAPL
 Date                                                                 
 2022-01-03  178.879929  179.734977  174.653889  174.771820  104487900
 2022-01-04  176.609650  179.793936  176.039622  179.489269   99310400
 2022-01-05  171.911835  177.071549  171.636651  176.521181   94537600
 2022-01-06  169.042084  172.285336  168.688274  169.730043   96904000
 2022-01-07  169.209167  171.145290  168.088773  169.916787   86709100,
 Price            Close        High         Low        Open    Volume
 Ticker            MSFT        MSFT        MSFT        MSFT      MSFT
 Date                                                                
 2022-01-03  325.634796  328.796299  320.800127  326.218464  28865100
 2022-01-04  320.051086  326.072535  317.239767  325.712586  32674300
 2022-01-05  307.765045  317.191191  307.375943  316.986888  40054300
 2022-01-06

# Calculate Spread and Z-Score
Calculate the spread between the two assets, compute rolling mean and standard deviation, and derive the Z-score.

In [6]:
# Calculate the spread between the two assets
if use_cointegration:
    # Perform linear regression to find the hedge ratio
    model = sm.OLS(data1['Close'], sm.add_constant(data2['Close'])).fit()
    hedge_ratio = model.params[1]  # Extract the hedge ratio
    spread = data1['Close'] - hedge_ratio * data2['Close']  # Calculate the spread using the hedge ratio
else:
    spread = data1['Close'] - data2['Close']  # Calculate the spread without cointegration

# Compute rolling mean and standard deviation of the spread
spread_mean = spread.rolling(window=lookback_window).mean()  # Rolling mean
spread_std = spread.rolling(window=lookback_window).std()  # Rolling standard deviation

# Derive the Z-score
z_score = (spread - spread_mean) / spread_std  # Z-score calculation

# Display the first few rows of the calculated Z-score for verification
z_score.head()

  hedge_ratio = model.params[1]  # Extract the hedge ratio


Ticker,AAPL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-03,,
2022-01-04,,
2022-01-05,,
2022-01-06,,
2022-01-07,,


# Backtest Pairs Trading Strategy
Define a function to backtest the pairs trading strategy and calculate the Sharpe ratio and cumulative returns.

In [7]:
# Define a function to backtest the pairs trading strategy
def backtest_pairs_trading(z_score, std_dev_threshold, data1, data2):
    """
    Backtest the pairs trading strategy based on Z-score thresholds.

    Parameters:
    z_score (pd.Series): Z-score of the spread.
    std_dev_threshold (float): Standard deviation threshold for entering/exiting trades.
    data1 (pd.DataFrame): Historical price data for the first asset.
    data2 (pd.DataFrame): Historical price data for the second asset.

    Returns:
    tuple: Sharpe ratio and final cumulative return of the strategy.
    """
    # Initialize positions DataFrame
    positions = pd.DataFrame(index=z_score.index)
    
    # Define trading signals based on Z-score thresholds
    positions['asset1'] = np.select(
        [z_score < -std_dev_threshold, z_score > std_dev_threshold, abs(z_score) < 0.5],  # Include an exit condition
        [1, -1, 0],  # Long, short, or exit
        default=0
    )
    positions['asset2'] = np.select(
        [z_score < -std_dev_threshold, z_score > std_dev_threshold, abs(z_score) < 0.5],
        [-1, 1, 0],  # Opposite positions for the second asset
        default=0
    )

    # Calculate daily returns for both assets
    asset1_returns = data1['Close'].pct_change()
    asset2_returns = data2['Close'].pct_change()
    
    # Calculate portfolio returns
    portfolio_returns = positions['asset1'].shift(1) * asset1_returns + positions['asset2'].shift(1) * asset2_returns
    
    # Calculate cumulative returns
    cumulative_returns = (1 + portfolio_returns).cumprod()
    
    # Calculate Sharpe ratio
    sharpe_ratio = portfolio_returns.mean() / portfolio_returns.std() * np.sqrt(252)
    
    return sharpe_ratio, cumulative_returns.iloc[-1]  # Return Sharpe ratio and final cumulative return

# Example usage of the function
sharpe_ratio, final_return = backtest_pairs_trading(z_score, 2.0, data1, data2)
sharpe_ratio, final_return  # Display the results for verification

ValueError: Expected a 1D array, got an array with shape (501, 2)

# Optimize Standard Deviation Band
Iterate over different standard deviation multiples to find the optimal band that maximizes the Sharpe ratio.

In [8]:
# Optimize Standard Deviation Band
# Iterate over different standard deviation multiples to find the optimal band that maximizes the Sharpe ratio

# Initialize a list to store results
results = []

# Loop through each standard deviation multiple
for multiple in std_dev_multiples:
    # Backtest the strategy for the current standard deviation multiple
    sharpe, final_return = backtest_pairs_trading(z_score, multiple, data1, data2)
    # Append the results to the list
    results.append({'std_dev_multiple': multiple, 'sharpe_ratio': sharpe, 'final_return': final_return})

# Convert the results into a DataFrame for analysis
results_df = pd.DataFrame(results)

# Identify the optimal standard deviation multiple that maximizes the Sharpe ratio
optimal_band = results_df.loc[results_df['sharpe_ratio'].idxmax()]

# Display the optimization results
print("Optimization Results:")
print(results_df)
print("\nOptimal Band (std_dev_multiple):", optimal_band['std_dev_multiple'])
print("Optimal Sharpe Ratio:", optimal_band['sharpe_ratio'])

ValueError: Expected a 1D array, got an array with shape (501, 2)

# Display Results
Display the optimization results and print the optimal standard deviation band and corresponding Sharpe ratio.

In [9]:
# Display the optimization results in a clear format
from IPython.display import display

# Display the results DataFrame
print("Optimization Results:")
display(results_df)

# Print the optimal standard deviation band and corresponding Sharpe ratio
print("\nOptimal Band (std_dev_multiple):", optimal_band['std_dev_multiple'])
print("Optimal Sharpe Ratio:", optimal_band['sharpe_ratio'])

Optimization Results:


NameError: name 'results_df' is not defined