In [1]:
#https://medium.com/@constandinou.antonio/quant-post-3-1-a-guided-path-into-mean-reversion-8b33b3c279e4
#https://medium.com/@bart.chr/pairs-trading-for-algorithmic-trading-breakdown-d8b709f59372

import pandas as pd
import statsmodels.api as sm
import numpy as np
import statsmodels.tsa.stattools as ts

In [73]:
df = pd.read_csv('datasets/data.csv')
data = df[['AALR3', 'ABCB4']]
data.columns = ['y', 'x']
x = sm.add_constant(data['x'])
y = data['y']

df = df[df.columns.difference(['Data'])]

In [63]:
# Fit and summarize OLS model
mod = sm.OLS(y, x)
results = mod.fit()
print(results.params)
#print(results.summary())

const   -6.936373
x        1.308303
dtype: float64


In [26]:
halflife = round(-np.log(2))/results.params[1]
halflife

-0.7643490674975995

In [5]:
print('half life',-np.log(2)/results.params[1])

half life -0.5298064010995843


In [6]:
print(results.f_test(np.identity(2)))

<F test: F=array([[9701.85022323]]), p=7.864479046236036e-106, df_denom=90, df_num=2>


In [7]:
#https://groups.google.com/forum/#!topic/pystatsmodels/HbUIeSuXqZs
class DickeyFullerTest:
    def __init__(self):
        self.df = None
        self.crit = None
        self.slope = None
        self.halflife = None
        self.lookback = None
        self.coint = None
        
    def adf(self, y, x):

        model = sm.GLS(y,sm.add_constant(x)).fit()
        coint = model.resid

        print(model.params)

        adfstat, pvalue, critvalues, res = ts.adfuller(coint, store=True, regresults=True)

        self.df = model.params[1]/model.bse[1]
        self.crit = res.critvalues
        self.slope = model.params[1]
        self.halflife = -np.log(2)/model.params[1]
        self.lookback = int(round(-np.log(2)/model.params[1]))
        self.coint = coint
        
        print('df: ', self.df)
        #print('crit: ', self.crit)
        print('slope: ', self.slope)
        print('halflife: ', self.halflife)
        print('lookback: ', self.lookback)

In [36]:
test = DickeyFullerTest()
test.adf(y, data['x'])

const   -6.936373
x        1.308303
dtype: float64
df:  45.48603661109386
slope:  1.3083027670510512
halflife:  -0.5298064010995843
lookback:  -1


In [56]:
#https://medium.com/@bart.chr/pairs-trading-for-algorithmic-trading-breakdown-d8b709f59372
#https://github.com/aconstandinou/mean-reversion
class ADF(object):
    """
    Augmented Dickey–Fuller (ADF) unit root test
    """

    def __init__(self):
        self.p_value = None
        self.five_perc_stat = None
        self.perc_stat = None
        self.p_min = .0
        self.p_max = .05
        self.look_back = 63

    def apply_adf(self, time_series):
        model = ts.adfuller(time_series, 1)
        self.p_value = model[1]
        self.five_perc_stat = model[4]['5%']
        self.perc_stat = model[0]

    def use_P(self):
        return (self.p_value > self.p_min) and (self.p_value < self.p_max)
    
    def use_critical(self):
        return abs(self.perc_stat) > abs(self.five_perc_stat)

class Half_Life(object):
    """
    Half Life test from the Ornstein-Uhlenbeck process 
    """

    def __init__(self):
        self.hl_min = 1.0
        self.hl_max = 42.0
        self.look_back = 43
        self.half_life = None

    def apply_half_life(self, time_series):
        lag = np.roll(time_series, 1)
        lag[0] = 0
        ret = time_series - lag
        ret[0] = 0

        # adds intercept terms to X variable for regression
        lag2 = sm.add_constant(lag)

        model = sm.OLS(ret, lag2)
        res = model.fit()
        
        #self.half_life = round(-np.log(2) / res.params[1], 0)
        self.half_life = -np.log(2) / res.params[1]

    def use(self):
        return (self.half_life < self.hl_max) and (self.half_life > self.hl_min)

# a value > 0.5 indicates a trending time series. The greater the value above 0.5 the more trending it is.
# a value = 0.5 indicates a random walk.
# a value < 0.5 indicates a mean reverting time series. The closer the value gets to 0 the more mean reverting it is
class Hurst():
    """
    If Hurst Exponent is under the 0.5 value of a random walk, then the series is mean reverting
    """

    def __init__(self):
        self.h_min = 0.0
        self.h_max = 0.4
        self.look_back = 126
        self.lag_max = 100
        self.h_value = None
    
    def apply_hurst(self, time_series):
        lags = range(2, self.lag_max)

        tau = [np.sqrt(np.std(np.subtract(time_series[lag:], time_series[:-lag]))) for lag in lags]

        #poly = np.polyfit(np.log10(lags), np.log10(tau), 1)
        poly = np.polyfit(np.log(lags), np.log(tau), 1)

        self.h_value = poly[0]*2.0 

    def use(self):
        return (self.h_value < self.h_max) and (self.h_value > self.h_min)

In [57]:
life = Half_Life()
life.apply_half_life(results.resid)
print(life.use())
print(life.half_life)

True
3.4749378345843525


In [58]:
adf = ADF()
adf.apply_adf(results.resid)
print(adf.use_P())
print(adf.use_critical())

True
True


In [None]:
hurst = Hurst()
hurst.apply_hurst(results.resid)

In [None]:
'cointegrated_pairs.csv'
for col1 in df.columns:
    for col2 in df.columns:
        x = sm.add_constant(df[col2])
        res = sm.OLS(df[col1], x).fit()
        
        life.apply_half_life(res.resid)
        print(life.use(), life.half_life)

In [90]:
x = sm.add_constant(df['ABCB4'])
res = sm.OLS(df['AALR3'], x).fit()

life.apply_half_life(res.resid)
print(life.use(), life.half_life)

True 3.4749378345843525


In [100]:
def half_life(time_series):
    lag = np.roll(time_series, 1)
    lag[0] = 0
    ret = time_series - lag
    ret[0] = 0

    # adds intercept terms to X variable for regression
    lag2 = sm.add_constant(lag)

    model = sm.OLS(ret, lag2)
    res = model.fit()

    halflife = int(round(-np.log(2) / res.params[1],0))#-np.log(2) / res.params[1]
    if halflife <= 0:
        halflife = 1
    return halflife

In [110]:
half_life(res.resid)

3