In [1]:
#https://medium.com/@constandinou.antonio/quant-post-3-1-a-guided-path-into-mean-reversion-8b33b3c279e4
#https://medium.com/@bart.chr/pairs-trading-for-algorithmic-trading-breakdown-d8b709f59372

import pandas as pd
import statsmodels.api as sm
import numpy as np
import statsmodels.tsa.stattools as ts

In [25]:
data = pd.read_csv('datasets/data_cart.csv')

In [43]:
y = data['ABEV3']
x = data['BRFS3']

In [44]:
# Fit and summarize OLS model
x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
print(model.params)

const    6.619922
BRFS3    0.338720
dtype: float64


In [45]:
result = ts.adfuller(model.resid, 1)
result

(-3.2508874512224795,
 0.017221712205789025,
 0,
 299,
 {'1%': -3.4524113009049935,
  '5%': -2.8712554127251764,
  '10%': -2.571946570731871},
 266.5368110529266)

In [47]:
adf = result[0]
p_value = result[1]
one_perc = result[4]['1%']
five_perc = result[4]['5%']
ten_perc = result[4]['10%']

one_perc, five_perc, ten_perc
if (abs(adf) > abs(one_perc)):
    print('99%')
if (abs(adf) > abs(five_perc)):
    print('95%')
if (abs(adf) > abs(ten_perc)):
    print('90%')
if (p_value < .05):
    print('Is estationary p-value')

95%
90%
Is estationary p-value


In [9]:
#https://medium.com/@bart.chr/pairs-trading-for-algorithmic-trading-breakdown-d8b709f59372
#https://github.com/aconstandinou/mean-reversion
class ADF(object):
    """
    Augmented Dickey–Fuller (ADF) unit root test
    """

    def __init__(self):
        self.p_value = None
        self.five_perc_stat = None
        self.perc_stat = None
        self.p_min = .0
        self.p_max = .05
        self.look_back = 63

    def apply_adf(self, time_series):
        model = ts.adfuller(time_series, 1)
        self.p_value = model[1]
        self.five_perc_stat = model[4]['5%']
        self.perc_stat = model[0]

    def use_P(self):
        return (self.p_value > self.p_min) and (self.p_value < self.p_max)
    
    def use_critical(self):
        return abs(self.perc_stat) > abs(self.five_perc_stat)

class Half_Life(object):
    """
    Half Life test from the Ornstein-Uhlenbeck process 
    """

    def __init__(self):
        self.hl_min = 1.0
        self.hl_max = 42.0
        self.look_back = 43
        self.half_life = None

    def apply_half_life(self, time_series):
        lag = np.roll(time_series, 1)
        lag[0] = 0
        ret = time_series - lag
        ret[0] = 0

        # adds intercept terms to X variable for regression
        lag2 = sm.add_constant(lag)
        model = sm.OLS(ret, lag2)
        res = model.fit()
        
        #self.half_life = round(-np.log(2) / res.params[1], 0)
        self.half_life = -np.log(2) / res.params[1]

    def use(self):
        return (self.half_life < self.hl_max) and (self.half_life > self.hl_min)

def half_life(spread):
    spread_lag = spread.shift(1)
    spread_lag.iloc[0] = spread_lag.iloc[1]

    spread_ret = spread - spread_lag
    spread_ret.iloc[0] = spread_ret.iloc[1]

    spread_lag2 = sm.add_constant(spread_lag)
    model = sm.OLS(spread_ret,spread_lag2)
    res = model.fit()
    
    halflife = int(round(-np.log(2) / res.params[1],0))

    if halflife <= 0:
        halflife = 1
    return halflife 

# a value > 0.5 indicates a trending time series. The greater the value above 0.5 the more trending it is.
# a value = 0.5 indicates a random walk.
# a value < 0.5 indicates a mean reverting time series. The closer the value gets to 0 the more mean reverting it is
class Hurst():
    """
    If Hurst Exponent is under the 0.5 value of a random walk, then the series is mean reverting
    """

    def __init__(self):
        self.h_min = 0.0
        self.h_max = 0.4
        self.look_back = 126
        #https://robotwealth.com/demystifying-the-hurst-exponent-part-1/
        self.lag_max = 20#era 100
        self.h_value = None
    
    def apply_hurst(self, time_series):
        lags = range(2, self.lag_max)

        tau = [np.sqrt(np.std(np.subtract(time_series[lag:], time_series[:-lag]))) for lag in lags]

        #poly = np.polyfit(np.log10(lags), np.log10(tau), 1)
        poly = np.polyfit(np.log(lags), np.log(tau), 1)

        self.h_value = poly[0]*2.0 

    def use(self):
        return (self.h_value < self.h_max) and (self.h_value > self.h_min)
    
def hurst_ernie_chan(p):
    lags = range(2, 20)
    variancetau = []; tau = []
    for lag in lags: 
        #  Write the different lags into a vector to compute a set of tau or lags
        tau.append(lag)

        # Compute the log returns on all days, then compute the variance on the difference in log returns
        # call this pp or the price difference
        pp = np.subtract(p[lag:], p[:-lag])
        variancetau.append(np.var(pp))

    # we now have a set of tau or lags and a corresponding set of variances.
    # plot the log of those variance against the log of tau and get the slope
    m = np.polyfit(np.log10(tau),np.log10(variancetau),1)

    hurst = m[0] / 2

    return hurst

def model_ols(y, x):
    x = sm.add_constant(x)
    model = sm.OLS(y, x).fit()
    return model

In [17]:
df = pd.read_csv('datasets-b3/AALR3.csv')
life = Half_Life()
time_series = df['Fechamento']
life.apply_half_life(time_series)
print(life.use())
print(life.half_life)

False
103.1098685218757


In [27]:
lag = np.roll(time_series, 1)
lag[0] = 0
ret = time_series - lag
ret[0] = 0
lag2 = sm.add_constant(lag)
model = sm.OLS(ret, lag2).fit()
-np.log(2) / res.params[1]

103.1098685218757

In [8]:
life = Half_Life()
life.apply_half_life(model.resid)
print(life.use())
print(life.half_life)

NameError: name 'model' is not defined

In [6]:
adf = ADF()
adf.check(model.resid)
print(adf.use_P())
print(adf.use_critical())
print(adf.p_value)

AttributeError: 'ADF' object has no attribute 'check'

In [7]:
hurst = Hurst()
hurst.apply_hurst(model.resid.as_matrix())
print(hurst.use())
print(hurst.h_value)

NameError: name 'model' is not defined