In [0]:
import numpy as np
import matplotlib.pyplot as plt


**Definition - Autocovariance**

The autocovariance function is defined as the second moment product

\\(cov(x{_s},x{_t})=E[(x{_s}-\mu{_s})(x{_t}-\mu{_t})]=E[x{_s}x{_t}]-E[x{_s}]E[x{_t}]\\)

This measures the linear dependence between two points on the same series observed at time s and t respectively.

If \\(s = t\\) then \\(E[x{_s}x{_t}]=E[x{_s}^2]\\) otherwise \\(E[x{_s}x{_t}]=E[x{_s}]E[x{_t}]\\).

Hence, the autocovariance if s is not equal t is 0.

If \\(s=t\\) then \\(E[x{_s}x{_d}]=E[x{_s}^2]=\sigma{_x}^2+\mu{_x}^2 \\) and the covariance will be \\(cov(x{_s},x{_s})=\sigma{_x}^2+\mu{_x}^2-\mu{_x}^2=\sigma{_x}^2\\)

Otherwise if \\(s\\) not equal \\(t\\) the covariance will be \\(cov(x{_s},x{_t})=E[x{_s}]E[x{_t}]-E[x{_s}]E[x{_t}]=0\\)


**Definition - Sample Autocovariance function**

The sample autocovariance function is defined as 

$$\hat \gamma (h) = \frac{1}{N}\sum_{t=1}^{N-h}(x_\{t+h}-\overline x)(x_\{t}-\overline x)$$

Notice how the correction terma is \\(\frac{1}{N}\\) and not \\(\frac{1}{N-h}\\). Taking a look at the expected value of the sample autocovariance function

$$E[\hat \gamma (h)]=\frac{1}{N} \sum_{i=1}^{N-\left|h\right|} \gamma(h)=\frac{1}{N}(N-\left|h\right|)\gamma(h)=(1-\frac{\left|h\right|}{N})\gamma(h)$$

It can be seen that is a biased estimator. The reason for not using the correction term, is that we want to ensure that the covariance and future covariance matrices are positive semi-definite. However, as N increased, the biases goes to zero, so for a sufficiently large sample size, this does not have any practical effect.


In [0]:
## Here the autocovariance is defined
def autocovariance(signal, lag):
    mean = np.mean(signal)
    n = len(signal)
    return np.mean((signal[:n-lag] - mean) * (signal[lag:] - mean))


**Example - Autocovariance of white noise**

Let \\(w_t\\) be a time series consisting of white noise with mean \\(\mu{_w}\\) and \\(var(w)=\sigma^2{_w}\\). The white noise will have an autocovariance \\(\gamma{_w}(s,t)=cov(w{_s},w{_t})\\) with value \\(\sigma^2{_w}\\) when \\(s=t\\) and \\(0\\) otherwise.

In [0]:

## Here a simulation of the autocovariance is made with different means and a standard deviation of 3
## to show that the autocovariance of white noise i 0 when s != t and sigma^2 when s=t.

## num_samples is the number of samples to be simulated
num_samples = [100*i for i in range(1000)]

## Simulate the autocovariance with different number of samples
def simulate_autocovariance(mu, sigma, num_samples):

    covs = np.zeros(len(num_samples))
    covs_lagged = np.zeros(len(num_samples))
    for n, samples in enumerate(num_samples):
        mu, sigma = 2, 3 # mean = 2 and standard deviation = 3
        w = np.random.normal(mu, sigma, samples)
        w_lagged = w[1:]
        w = w[:-1]

        # Calculate autocovariance for w with itself
        cov_w = autocovariance(w, 0)
        cov_w_lagged = autocovariance(w, 1)
        covs[n] = cov_w
        covs_lagged[n] = cov_w_lagged

    return covs, covs_lagged

## Calculate autocovariance with means 0 and 2 with standard deviation 3
cov_w_mean_2, cov_w_mean_2_lagged = simulate_autocovariance(2, 3, num_samples)
cov_w_mean_0, cov_w_mean_0_lagged = simulate_autocovariance(0, 3, num_samples)

## Plot how the autocovariance converges towards the theoretical value when the number of samples are large enough
plt.figure()
plt.plot(num_samples, cov_w_mean_2, label="Autocovariance s equal t (mean = 2)")
plt.plot(num_samples, cov_w_mean_0, label="Autocovariance s equal t (mean = 0)")
plt.legend()
plt.title("Autocovariance for s=t")
plt.xlabel("Number of samples")
plt.ylabel("Autocovariance")

plt.figure()
plt.plot(num_samples, cov_w_mean_2_lagged, label="Autocovariance s not equal t (mean = 2)")
plt.plot(num_samples, cov_w_mean_0_lagged, label="Autocovariance s not equal t (mean = 0)")
plt.legend()
plt.title("Autocovariance for t=s+1")
plt.xlabel("Number of samples")
plt.ylabel("Autocovariance")


**Example - Autocovariance of a moving average**

A new signal \\(v_{s}\\) is constructed by applying a two point average to a white noise series $$v{_s}=\frac{1}{2}(w{_s}+w_\{s-1})$$

Then the autocovariance function becomes \\(\gamma{_v}(s,t)=cov(\frac{1}{2}(w{_s}+w_\{s-1}, \frac{1}{2}(w{_t}+w_\{t-1})\\).

When \\(t=s\\) then $$\gamma{_v}(s,s)=cov(\frac{1}{2}(w{_s}+w_\{s-1}), \frac{1}{2}(w{_s}+w_\{s-1}))=\frac{1}{4}(cov(w_s, w_s)+cov(w_{s-1}, w_s)+cov(w_s, w_{s-1})+cov(w_\{s-1}, w_\{s-1}))=\frac{1}{2}\sigma{_w}^2$$

These simplifications can be made since the random variables \\(U=\sum_j a_{j}X_{j}\\) and \\(V=\sum_k b_{k}Y_{k}\\) are linear combinations of the random variables \\(X_{j}\\) and \\(Y_{k}\\) and will have \\(cov(U,V)=\sum_j \sum_k a{_j}b{_k}cov(X{_j},Y{_k})\\).

When \\(t=s-1\\) then $$\gamma{_v}(s,s+1)=cov(\frac{1}{2}(w{_s}+w_\{s-1}), \frac{1}{2}(w_\{s-1}+w_\{s-2}))=\frac{1}{4}\sigma{_w}^2$$


In [0]:
mu, sigma = 0, 3 # mean = 0 and standard deviation = 3
## Use a sample figure large enough to show that the autocovariance converges to the theoretical value
num_samples = 100000

# Create the white noise signal
w = np.random.normal(mu, sigma, num_samples)

# Create the moving average signal
wf = np.zeros(num_samples)
for i in range(1, num_samples):
    wf[i] = 1/2*(w[i] + w[i-1])

# Create the lagged white noise signals
wf_lagged_one = wf[1:-1]
wf_lagged_two = wf[2:]
wf = wf[:-2]

print(f"Autocovariance of signal wf with lag = 0: Autocovariance = {autocovariance(wf, 0):.3f}, Theoretical autocovariance = {1/2*sigma**2}")
print(f"Autocovariance of signal wf with lag = 1: Autocovariance = {autocovariance(wf, 1):.3f}, Theoretical autocovariance = {1/4*sigma**2}")
print(f"Autocovariance of signal wf with lag = 2: Autocovariance = {autocovariance(wf, 2):.3f}, Theoretical autocovariance = {0}")



**Example - Autocovariance of a Random Walk**

The random walf is defined as $$x_{t}=\delta t+\sum_\{j=1}^sw_\{j}$$ 

Here \\(\delta\\) is the drift in time. For this example this is set to 0.

The autocovariance is then 

$$\gamma(s,t)=cov(x_{s},x_{t})=cov(\sum_\{j=1}^sw_\{j}, \sum_\{k=1}^tw_\{k})$$

Rewriting using the definition of autocovariance this becomes:

$$cov(\sum_\{j=1}^sw_\{j}, \sum_\{k=1}^tw_\{k})=E[\sum_\{j=1}^sw_\{j}\sum_\{k=1}^tw_\{k}]=E[\sum_\{j=1}^s \sum_\{k=1}^t w_\{j}w_\{k}]]=\sum_\{j=1}^s \sum_\{k=1}^t E[w_\{j}w_\{k}]]$$

To simplyfy this, consider the case where \\(s=2\\) and \\(t=3\\)

$$\gamma(2,3)=E[w_{1}w_{1} + w_{1}w_{2} + w_{1}w_{3} + w_{2}w_{1} + w_{2}w_{2} + w_{2}w_{3}]$$

Since \\(E[w_{j}w_{k}]=0\\) when \\(j\\) is not equal to \\(k\\) this is reduced to:

$$\gamma(2,3)=E[w_{1}w_{1} + w_{2}w_{2}]=2\sigma_{w}^2$$

This can be done for different values of s and t to convince us that: 

$$cov(\sum_\{j=1}^sw_\{j}, \sum_\{k=1}^tw_\{k})=\min(s,t) \sigma_{w}^2$$

In [0]:
mu, sigma = 0, 1 # mean = 0 and standard deviation = 3
## Use a sample figure large enough to show that the autocovariance converges to the theoretical value
num_samples = 100

num_tests = 1000
covariance_sum = 0
rwalks = []

for t in range(num_tests):
    #print(f"Test {t+1} of {num_tests}")
    ## Create random samples
    w = np.random.normal(mu, sigma, num_samples)
    ## Create the random walk by performing a cumulative sum
    rw = np.cumsum(w)
    rwalks += [rw]
    ## Calculate the covariance
    
    cov = autocovariance(rw, 0)

    covariance_sum += cov

cov = covariance_sum / num_tests

cov_theoretical = (num_samples)*(sigma**2)

## The empirical value is only calculated with a lag of 0, which is the variance for time index t
## This is because the random walk is non-stationary and computing the autocovariance will no yield the expected result
cov_empirical = np.var([rw[-1] for rw in rwalks])
print(f"Empirical value: {cov_empirical:.3f}, theoretical value: {cov_theoretical}, error_margin: {(np.abs(cov_theoretical-cov_empirical)/cov_theoretical)*100:.3f}%")

plt.figure()
for rw in rwalks:
    plt.plot(rw)
plt.title("Random walks")
plt.xlabel("Time")
plt.ylabel("Value")

**Definition - Autocorrelation**

The autocorrelation is defined as $$\rho(s,t)=\frac{\gamma(s,t)}{ \sqrt{\mathstrut \gamma(s,s)\gamma(t,t)}}$$

This yields a number between -1 and 1 of how well a series \\(x_{t}\\) can be linearly predicted at time \\(t\\) using only \\(x_s\\).

If \\(x_{t}\\) can be perfectly predicted from \\(x_{s}\\) from the linear relationship \\(x_{t} = \alpha + \beta x_{s} \\) then the autocorrelation will be \\(+1\\).

**Definition - Sampe Autocorrelation function**

The sample autocovariance function is defined as 

$$\hat \rho (h) = \frac{\hat \gamma (h)}{\hat \gamma (0)}$$


In [0]:
def autocorrelation(s, lag):
    return autocovariance(s, lag) / autocovariance(s, 0)

**Property - Large sample distribution of the Autocorrelation Function**

A white noise process \\(x_{t}\\) is strictly stationary and will have a autocorrelation function \\(\hat \rho_{x}(h)\\) that is normally distributed with zero mean for \\(h>1\\) when n is large. The standard deviation is given by

$$\sigma_\{\rho_{x}(h)}=\frac{1}{\sqrt{\mathstrut n}}$$

This property allows us to asses if a peak in the autocorrelation function is significant. If we only consider peaks within \\(\pm 2\hat \rho_{x}(h)\\) approximately 95% of all peaks should be within that interval, if the process is indeed white noise.

In [0]:
## Here the variance of the autocorrelation is estimated for white noise and a correlated signal and compared to the theoretical value
## It is shown that as n grows, the correlated signal will have a variance greater than the theoretical value 1/sqrt(n)
num_samples = [100*i for i in range(1, 20)]

tests = 100
mu, sigma = 0, 1
lag = 2

def gen_white_noise_signal(num):
    return np.random.normal(0, 1, num)

def gen_correlated_signal(num):
    filter_length = 4
    # Since param valid is chosen for the convolution num - filter_length + 1
    # samples are returned. To correct for this, num+filter_length-1 samples are generated
    w = gen_white_noise_signal(num+filter_length-1)
    wf = np.convolve(w, np.ones(filter_length), 'valid') / filter_length
    return wf

def calculate_acf_variance(x_gen, lag, num_samples, tests):

    acf_estimated_mean = []
    acf_estimated_std = []

    for n, num in enumerate(num_samples):

        processes = [x_gen(num) for i in range(tests)]
        acf_results = [autocorrelation(p, lag) for p in processes]

        acf_estimated_mean += [np.mean(acf_results)]
        acf_estimated_std += [np.std(acf_results)]

    return acf_estimated_mean, acf_estimated_std

white_noise_variance_theoretical = [1/np.sqrt(num) for num in num_samples]
white_noise_variance_confidence = [2/np.sqrt(num) for num in num_samples]
white_noise_mean_empirical, white_noise_std_empirical = calculate_acf_variance(gen_white_noise_signal, lag, num_samples, tests)
correlated_signal_mean, correlated_signal_std = calculate_acf_variance(gen_correlated_signal, lag, num_samples, tests)

plt.figure()
plt.plot(num_samples, white_noise_std_empirical, label=f"Empirical standard deviation")
plt.plot(num_samples, white_noise_variance_theoretical, label="Theoretical standard deviation")
plt.plot(num_samples, white_noise_variance_confidence, label="95% confidence standard deviation")
plt.title(f"Standard deviation of estimated ACF lag={lag} of white noise")
plt.legend()
plt.xlabel("Number of samples")
plt.ylabel("Standard deviation of ACF")

plt.figure()
plt.plot(num_samples, correlated_signal_mean, label=f"Mean ACF lag={lag} of correlated signal")
plt.plot(num_samples, white_noise_variance_confidence, label="95% confidence standard deviation ACF of white noise")
plt.title(f"Correlated signal ACF lag={lag} vs 95% confidence")
plt.legend()
plt.xlabel("Number of samples")
plt.ylabel("Standard deviation of ACF")

**Definition - Cross Covariance function**

The cross covariance function between two series is given as

$$\gamma_\{xy}(h)=cov(x_\{t+h}, y_{t})=E[(x_\{t+h}-\mu_{x})(y_\{t}-\mu_{y})]$$

**Definition - Jointly stationary series**

If two series are jointly stationary if they both are stationary and the  cross covariance function is only a function of lag \\(h\\).

**Definition - Cross correlation function**

For two jointly stationary series the cross correlation function is defined as 

$$\rho_\{xy}(h)=\frac{\gamma_\{xy}(h)}{\sqrt{\mathstrut \gamma_\{x}(0) \gamma_\{y}(0)}}$$

**Definition - Sample cross covariance function**

Given a realization of a random process the cross covariance can be estimated with the sample cross covariance function

$$\hat \gamma_\{xy}(h)=\frac{1}{n} \sum_\{t=1}^\{n-h}(x_\{t+h}-\overline x)(y_\{t}-\overline y)$$

In [0]:
## Here the sample crosscovariance is defined
def crosscovariance(x, y, lag):
    x_mean = np.mean(x)
    x_n = len(x)
    y_mean = np.mean(y)
    return np.mean((x[:x_n-lag] - x_mean) * (y[lag:] - y_mean))


**Definition - Sample cross correlation function**

For two jointly stationary series the cross correlation function is defined as

$$\hat \rho_\{xy}(h)=\frac{\hat \gamma_\{xy}(h)}{\sqrt{\mathstrut \hat  \gamma_\{x}(0) \hat  \gamma_\{y}(0)}}$$

In [0]:
## Sample cross correlation
def cross_correlation(x, y, lag):
    return crosscovariance(x, y, lag) / np.sqrt(autocovariance(x, 0)*autocovariance(y, 0))

**Property - Large sample distribution of cross correlation under independence**

The large sample distribution of \\(\hat \rho_\{xy}(h)\\) is normal with zero mean and $$\sigma_\{\hat \rho_\{xy}}=\frac{1}{\sqrt n}$$

If at least one of the series is white noise.

In [0]:
## To show that the large sample distribution of the cross correlation converges towards the theoretical value 
## an correlated and an uncorrelated signal is generated and the cross correlation is calculated

num_samples = [100*i for i in range(1, 20)]

tests = 100
mu, sigma = 0, 1
lag = 2

def calculate_ccf_variance(x_gen, y_gen, lag, num_samples, tests):

    ccf_estimated_mean = []
    ccf_estimated_std = []

    for n, num in enumerate(num_samples):

        x_processes = [x_gen(num) for i in range(tests)]
        y_processes = [y_gen(num) for i in range(tests)]
        ccf_results = [cross_correlation(x, y, lag) for x, y in zip(x_processes, y_processes)]

        ccf_estimated_mean += [np.mean(ccf_results)]
        ccf_estimated_std += [np.std(ccf_results)]

    return ccf_estimated_mean, ccf_estimated_std

ccf_variance_theoretical = [1/np.sqrt(num) for num in num_samples]
ccf_confidence = [2/np.sqrt(num) for num in num_samples]
ccf_mean, ccf_std = calculate_ccf_variance(gen_white_noise_signal, gen_correlated_signal, lag, num_samples, tests)

plt.figure()
plt.plot(num_samples, ccf_std, label=f"Empirical standard deviation")
plt.plot(num_samples, ccf_variance_theoretical, label="Theoretical standard deviation")
plt.plot(num_samples, ccf_confidence, label="95% confidence standard deviation")
plt.title(f"Cross covariance for white noise and a correlated signal at lag={lag}")
plt.legend()
plt.xlabel("Number of samples")
plt.ylabel("Standard deviation of ACF")



**Example - A simulated time series**

Let \\(x_{t}\\) be a white noise process and 

$$y_{t}=5+x_{t}-0.7x_\{t-1}$$

The sample autocorrelation for the series \\(y_{t}\\) can be calculated with the sample autocorrelation function as shown before. The theoretical autocorrelation function for \\(h=1\\) can be calculated as

$$\rho_{y}(1)=\frac{\gamma_{y}(1)}{\gamma_{y}(0)}=\frac{cov(y_\{t+1}, y_t)}{\sigma_{y}^2}$$

The expected value of \\(y_{t}\\) is \\(E[y_{t}]=\mu_y=5\\) and the variance is \\(var(y_{t})=\sigma_{x}^2(1+0.7^2)\\) due to linearity. 

$$cov(y_t+1, y_t) = E[(5+x_\{t+1}-0.7x_\{t-1+1}-\mu_y)(5+x_\{t}-0.7x_\{t-1}-\mu_y)]= E[(-0.7x_{t}x_{t}+0.7^2x_{t}^2)]=-0.7E[x_{t}^2]=-0.7\sigma_{x}^2$$

$$\rho_{y}(1)=\frac{-0.7\sigma_{x}^2}{\sigma_{x}^2(1+0.7^2)}=\frac{-0.7}{1+0.7^2}$$

In [0]:
## To convince ourselves that this is indeed correct, the series is simulated and the sample autocorrelation is found
num_samples = [100*i for i in range(100)]
lag = 1
acf_lag_empiricals = []

for num in num_samples:
    mu_x, sigma_x = 0, 1
    x = np.random.normal(mu_x, sigma_x, num)
    y = np.zeros(num)

    for i in range(num):
        x_last = 0
        if i-1 >= 0:
            x_last = x[i-1] 
        y[i] = 5 + x[i] -0.7*x_last

    acf_lag_empiricals += [autocorrelation(y, lag=lag)]

acf_lag_theroretical = -0.7/(1+0.7**2)

print(f"ACF Lag={lag}, N={num_samples[-1]}. Empirical={acf_lag_empiricals[-1]:.3f}, Theoretical={acf_lag_theroretical:.3f}")

plt.figure()
plt.plot(num_samples, acf_lag_empiricals, label="Empirical")
plt.plot(num_samples, [acf_lag_theroretical for num in num_samples], label="Theoretical")
plt.xlabel("Samples")
plt.ylabel("Value")
plt.title(f"Empirical vs theoretical value for ACF Lag={lag}")
plt.legend()