In [53]:
#https://www.quantstart.com/articles/Backtesting-An-Intraday-Mean-Reversion-Pairs-Strategy-Between-SPY-And-IWM/
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cointegration_mult as mult
import statsmodels.api as sm

In [4]:
path_data_cart = 'datasets/data_cart.csv'

In [28]:
data = pd.read_csv(path_data_cart, index_col=0)
data = data[::-1]
data = data[:250]

In [42]:
y_symbol = 'TAEE11'
x_symbol = 'USIM5'
pairs = data[[y_symbol, x_symbol]].copy()

In [30]:
pair = data[[y_symbol, x_symbol]].copy()

pair['residue'] = 0
pair['residue'] = mult.residue(data[y_symbol], data[x_symbol], 250)
pair['input_desv_up'] = 2*pair['residue'].std()
pair['input_desv_donw'] = -2*pair['residue'].std()

In [31]:
pair

Unnamed: 0_level_0,TAEE11,USIM5,residue,input_desv_up,input_desv_donw
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
02/07/2020,28.41,7.35,-1.354964,1.309207,-1.309207
01/07/2020,28.49,7.34,-1.254932,1.309207,-1.309207
30/06/2020,28.10,7.27,-1.571034,1.309207,-1.309207
29/06/2020,28.67,7.15,-0.882250,1.309207,-1.309207
26/06/2020,28.07,7.10,-1.426308,1.309207,-1.309207
...,...,...,...,...,...
08/07/2019,28.34,9.65,-0.781184,1.309207,-1.309207
05/07/2019,28.03,9.68,-1.107061,1.309207,-1.309207
04/07/2019,27.78,9.45,-1.139525,1.309207,-1.309207
03/07/2019,27.67,9.07,-0.897328,1.309207,-1.309207


In [33]:
for day in range(0, len(pair)):
    day

In [60]:
def calculate_spread_zscore(pairs, y_symbol, x_symbol, lookback=100):
    """Creates a hedge ratio between the two symbols by calculating
    a rolling linear regression with a defined lookback period. This
    is then used to create a z-score of the 'spread' between the two
    symbols based on a linear combination of the two."""
    
    # Use the pandas Ordinary Least Squares method to fit a rolling
    # linear regression between the two closing price time series
    print("Fitting the rolling Linear Regression...")
    """model = pd.ols(y=pairs[y_symbol], 
                   x=pairs[x_symbol],
                   window=lookback)"""
    
    x=sm.add_constant(pairs[x_symbol])
    model = sm.OLS(pairs[y_symbol], x).fit()
    
    # Construct the hedge ratio and eliminate the first 
    # lookback-length empty/NaN period
    pairs['hedge_ratio'] = model.params[x_symbol]
    pairs = pairs.dropna()

    # Create the spread and then a z-score of the spread
    print("Creating the spread/zscore columns...")
    pairs['spread'] = pairs[y_symbol] - pairs['hedge_ratio']*pairs[x_symbol]
    pairs['zscore'] = (pairs['spread'] - np.mean(pairs['spread']))/np.std(pairs['spread'])
    return pairs

In [65]:
spread_zscore = calculate_spread_zscore(pairs, y_symbol, x_symbol, 100)
spread_zscore

Fitting the rolling Linear Regression...
Creating the spread/zscore columns...


Unnamed: 0_level_0,TAEE11,USIM5,hedge_ratio,spread,zscore
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
02/07/2020,28.41,7.35,0.660098,23.558280,-0.036760
01/07/2020,28.49,7.34,0.660098,23.644881,0.054951
30/06/2020,28.10,7.27,0.660098,23.301087,-0.309129
29/06/2020,28.67,7.15,0.660098,23.950299,0.378391
26/06/2020,28.07,7.10,0.660098,23.383304,-0.222061
...,...,...,...,...,...
08/07/2019,28.34,9.65,0.660098,21.970054,-1.718702
05/07/2019,28.03,9.68,0.660098,21.640251,-2.067966
04/07/2019,27.78,9.45,0.660098,21.542074,-2.171937
03/07/2019,27.67,9.07,0.660098,21.682911,-2.022789


In [67]:
def create_long_short_market_signals(pairs, symbols, z_entry_threshold=2.0, z_exit_threshold=1.0):
    """Create the entry/exit signals based on the exceeding of 
    z_enter_threshold for entering a position and falling below
    z_exit_threshold for exiting a position."""

    # Calculate when to be long, short and when to exit
    pairs['longs'] = (pairs['zscore'] <= -z_entry_threshold)*1.0
    pairs['shorts'] = (pairs['zscore'] >= z_entry_threshold)*1.0
    pairs['exits'] = (np.abs(pairs['zscore']) <= z_exit_threshold)*1.0

    # These signals are needed because we need to propagate a
    # position forward, i.e. we need to stay long if the zscore
    # threshold is less than z_entry_threshold by still greater
    # than z_exit_threshold, and vice versa for shorts.
    pairs['long_market'] = 0.0
    pairs['short_market'] = 0.0

    # These variables track whether to be long or short while
    # iterating through the bars
    long_market = 0
    short_market = 0

    # Calculates when to actually be "in" the market, i.e. to have a
    # long or short position, as well as when not to be.
    # Since this is using iterrows to loop over a dataframe, it will
    # be significantly less efficient than a vectorised operation,
    # i.e. slow!
    print ("Calculating when to be in the market (long and short)...")
    for i, b in enumerate(pairs.iterrows()):
        # Calculate longs
        if b[1]['longs'] == 1.0:
            long_market = 1            
        # Calculate shorts
        if b[1]['shorts'] == 1.0:
            short_market = 1
        # Calculate exists
        if b[1]['exits'] == 1.0:
            long_market = 0
            short_market = 0
        # This directly assigns a 1 or 0 to the long_market/short_market
        # columns, such that the strategy knows when to actually stay in!
        pairs.ix[i]['long_market'] = long_market
        pairs.ix[i]['short_market'] = short_market
    return pairs

In [69]:
long_short_market_signals = create_long_short_market_signals(spread_zscore, '')

Calculating when to be in the market (long and short)...


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated


In [73]:
def create_portfolio_returns(pairs, y_symbol, x_symbol):
    """Creates a portfolio pandas DataFrame which keeps track of
    the account equity and ultimately generates an equity curve.
    This can be used to generate drawdown and risk/reward ratios."""
    
    # Convenience variables for symbols
    sym1 = y_symbol
    sym2 = x_symbol

    # Construct the portfolio object with positions information
    # Note that minuses to keep track of shorts!
    print ("Constructing a portfolio...")
    portfolio = pd.DataFrame(index=pairs.index)
    portfolio['positions'] = pairs['long_market'] - pairs['short_market']
    portfolio[sym1] = -1.0 * pairs[y_symbol] * portfolio['positions']
    portfolio[sym2] = pairs[x_symbol] * portfolio['positions']
    portfolio['total'] = portfolio[sym1] + portfolio[sym2]

    # Construct a percentage returns stream and eliminate all 
    # of the NaN and -inf/+inf cells
    print ("Constructing the equity curve...")
    portfolio['returns'] = portfolio['total'].pct_change()
    portfolio['returns'].fillna(0.0, inplace=True)
    portfolio['returns'].replace([np.inf, -np.inf], 0.0, inplace=True)
    portfolio['returns'].replace(-1.0, 0.0, inplace=True)

    # Calculate the full equity curve
    portfolio['returns'] = (portfolio['returns'] + 1.0).cumprod()
    return portfolio

In [74]:
create_portfolio_returns(long_short_market_signals, y_symbol, x_symbol)

Constructing a portfolio...
Constructing the equity curve...


Unnamed: 0_level_0,positions,TAEE11,USIM5,total,returns
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
02/07/2020,0.0,-0.00,0.00,0.00,1.000000
01/07/2020,0.0,-0.00,0.00,0.00,1.000000
30/06/2020,0.0,-0.00,0.00,0.00,1.000000
29/06/2020,0.0,-0.00,0.00,0.00,1.000000
26/06/2020,0.0,-0.00,0.00,0.00,1.000000
...,...,...,...,...,...
08/07/2019,0.0,-0.00,0.00,0.00,1.040702
05/07/2019,1.0,-28.03,9.68,-18.35,1.040702
04/07/2019,1.0,-27.78,9.45,-18.33,1.039568
03/07/2019,1.0,-27.67,9.07,-18.60,1.054881
