In [25]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from hurst import compute_Hc, random_walk
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm

In [8]:
df = yf.download(
    tickers = 'AAPL MSFT AMD USDCAD=x',
    period="max",
    start="2022-01-01", 
#    end="2022-03-28",
    interval="1d",
    group_by = 'ticker'
)

[*********************100%***********************]  4 of 4 completed


In [9]:
close = df.iloc[:, df.columns.get_level_values(1)=='Close']
close.columns = ['AAPL', 'MSFT', 'AMD', 'USDCAD']
close.dropna(inplace=True)
df = close.copy()
del close
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  close.dropna(inplace=True)


Unnamed: 0_level_0,AAPL,MSFT,AMD,USDCAD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-03,150.240005,1.26588,334.750000,182.009995
2022-01-04,144.419998,1.27513,329.010010,179.699997
2022-01-05,136.149994,1.27044,316.380005,174.919998
2022-01-06,136.229996,1.27601,313.880005,172.000000
2022-01-07,132.000000,1.27302,314.040009,172.169998
...,...,...,...,...
2023-05-23,108.120003,1.35029,315.260010,171.559998
2023-05-24,108.269997,1.34994,313.850006,171.839996
2023-05-25,120.349998,1.35885,325.920013,172.990005
2023-05-26,127.029999,1.36432,332.890015,175.429993


In [12]:
for j in ['AAPL', 'MSFT', 'AMD', 'USDCAD']:
    
    series = np.log(df[j])
    
    H, c, data = compute_Hc(series, kind='price', simplified=True)

    print('Exponente Hurst de ' + j + ': ' + str(H))

Exponente Hurst de AAPL: 0.5402931069282246
Exponente Hurst de MSFT: 0.4989987893009356
Exponente Hurst de AMD: 0.48085919098575797
Exponente Hurst de USDCAD: 0.41559722988321796


In [19]:
usdcad = df['USDCAD']
usdcad = pd.DataFrame(usdcad)
usdcad

Unnamed: 0_level_0,USDCAD
Date,Unnamed: 1_level_1
2022-01-03,182.009995
2022-01-04,179.699997
2022-01-05,174.919998
2022-01-06,172.000000
2022-01-07,172.169998
...,...
2023-05-23,171.559998
2023-05-24,171.839996
2023-05-25,172.990005
2023-05-26,175.429993


In [23]:
usdcad['lag'] = usdcad['USDCAD'].shift(1)
usdcad['dif'] = usdcad['USDCAD'] - usdcad['lag']
usdcad.dropna(inplace=True)
usdcad

Unnamed: 0_level_0,USDCAD,lag,dif
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-04,179.699997,182.009995,-2.309998
2022-01-05,174.919998,179.699997,-4.779999
2022-01-06,172.000000,174.919998,-2.919998
2022-01-07,172.169998,172.000000,0.169998
2022-01-10,172.190002,172.169998,0.020004
...,...,...,...
2023-05-23,171.559998,174.199997,-2.639999
2023-05-24,171.839996,171.559998,0.279999
2023-05-25,172.990005,171.839996,1.150009
2023-05-26,175.429993,172.990005,2.439987


In [24]:
x = usdcad['lag']
y = usdcad['dif']

In [26]:
x = sm.add_constant(x)
model = sm.OLS(y,x)
results = model.fit()
results.params

const    4.689167
lag     -0.030322
dtype: float64

In [40]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    dif   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     5.761
Date:                Tue, 30 May 2023   Prob (F-statistic):             0.0169
Time:                        19:32:54   Log-Likelihood:                -895.86
No. Observations:                 352   AIC:                             1796.
Df Residuals:                     350   BIC:                             1803.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.6892      1.966      2.385      0.0

In [31]:
halflife = -np.log(2)/results.params.iloc[-1]
print('Tiempo medio de la reversión a la media: ' + str(round(halflife, 1)) + ' days')

Tiempo medio de la reversión a la media: 22.9 days
