Calculates the autocorrelation of the specified lag, according to the formula *



$$X_{t}=\varphi_0 +\sum _{{i=1}}^{k}\varphi_{i}X_{{t-i}}+\varepsilon_{t}$$

In [1]:
from statsmodels.tsa.ar_model import AutoReg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
x = pd.read_csv('./data/daily-min-temperatures.csv', header=0, index_col=0)
print(x.head())

            Temp
Date            
1981-01-01  20.7
1981-01-02  17.9
1981-01-03  18.8
1981-01-04  14.6
1981-01-05  15.8


In [3]:
if isinstance(x, pd.Series):
    print('pd.Series')
    x = x.values

In [4]:
lag = 30 

# Slice the relevant subseries based on the lag
y1 = x[: (len(x) - lag)]
y2 = x[lag:]

In [5]:
y1.shape, y2.shape

((3620, 1), (3620, 1))

Calculates the autocorrelation of the specified lag, according to the formula: 

$$\frac{1}{(n-l)\sigma^{2}} \sum_{t=1}^{n-l}(X_{t}-\mu )(X_{t+l}-\mu)$$

In [6]:
# Subtract the mean of the whole series x
x_mean = np.mean(x)

In [7]:
# The result is sometimes referred to as "covariation"
sum_product = np.sum((y1 - x_mean) * (y2 - x_mean))

In [8]:
sum_product

Temp    58411.331796
dtype: float64

In [9]:
# Return the normalized unbiased covariance
v = np.var(x)
print(v)

Temp    16.575313
dtype: float64


In [10]:
if np.isclose(v, 0):
    autocorrelation= np.NaN
else:
    autocorrelation= sum_product / ((len(x) - lag) * v)

In [11]:
autocorrelation

Temp    0.973479
dtype: float64

### References
* Wikipedia : https://en.wikipedia.org/wiki/Autocorrelation#Estimation