Module 4:

## Lecture 1 - Time Series Analysis: Autocorrelation and Stationarity


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from statsmodels.tsa.stattools import adfuller
import sympy


  from pandas import Int64Index as NumericIndex


### 7. Examples of Stationarity and Non-Stationarity

#### Stationarity and non-stationarity


In [2]:
# Generate a random time series made of 100 observations, with
# a mean of 0 and a standard deviation of 1
ts = np.random.normal(0, 1, 100)

# We generate an associated time index of integers
time_index = np.arange(0, 100)

# In the lecture, the base time series is referred as Zt
Zt = ts


def is_stationary(ts):
    """
    Function that check if a time series is stationary or not.
    """

    # Calculate the Augmented Dickey-Fuller test
    try:
        result = adfuller(ts)
    except:
        return False

    # Get the p-value
    p_value = result[1]

    # If the p-value is less than 0.05, the series is stationary
    # since the null hypothesis is rejected
    if p_value < 0.05:
        return True
    else:
        return False


# Calculate the different distributions
Yt = np.cos(2 * np.pi * time_index / 7) + Zt
Xt = np.cos(Zt)
Wt = time_index * Zt
Vt = np.sum(np.cos(Zt))
Ut = np.sum(np.cos(Zt[0:10]) * (0.5**time_index[0:10]))
Qt = np.sum(np.cos(Zt) * (0.5**time_index))

# Create a dictionary of all the time series
time_series = {
    "Zt": Zt,
    "Yt": Yt,
    "Xt": Xt,
    "Wt": Wt,
    "Vt": Vt,
    "Ut": Ut,
    "Qt": Qt
}

# Check the stationarity of each time series
for key, value in time_series.items():
    print(
        f"{key}:",
        "Stationary" if is_stationary(value) else "Non-stationary"
    )


Zt: Stationary
Yt: Stationary
Xt: Stationary
Wt: Stationary
Vt: Non-stationary
Ut: Non-stationary
Qt: Non-stationary


Depending on the time series, Yt and Wt may be stationary or non-stationary. However, we should use the worst case, so I will assume that both are non-stationary. Then we have the sums. The sums (Vt, Ut and Qt), I thought were non stationary since they were single values, but according to the lecture, Ut is stationary, while all other sums are non-stationary. Im still not a 100% sure why this is, but the lecture answers can be used to explain this.

### 9. Differencing

#### Differencing Time Series


In [3]:
# Declare four variables: a, b and Wt
a, b = sympy.symbols("a b")

# Define the first 5 Wt stationary time series
W1, W2, W3, W4, W5 = sympy.symbols("W1 W2 W3 W4 W5")

# Define the equation for time steps 1-5
X1 = a + b*(1**2) + W1
X2 = a + b*(2**2) + W2
X3 = a + b*(3**2) + W3
X4 = a + b*(4**2) + W4
X5 = a + b*(5**2) + W5

# Print the difference
deltaX5 = X5 - X4
print("Delta 5:", deltaX5.simplify())

# Get the fourth delta and then the 5th member of the second difference
deltaX4 = X4 - X3
double_deltaX5 = deltaX5 - deltaX4
print("Double delta 5:", double_deltaX5.simplify())


Delta 5: -W4 + W5 + 9*b
Double delta 5: W3 - 2*W4 + W5 + 2*b


#### Differencing Time Series: Removing Stochastic Dependence


In [4]:
# Declare the variables for X1, X2, ...
X1, X2, X3, X4, X5 = sympy.symbols("X1 X2 X3 X4 X5")

X1 = W1
X2 = 2*X1 + W2
X3 = 2*X2 - X1 + W3
X4 = 2*X3 - X2 + W4
X5 = 2*X4 - X3 + W5

# Get the differences
deltaX5 = X5 - X4
deltaX4 = X4 - X3
deltaX3 = X3 - X2
deltaX2 = X2 - X1

# Print the difference
print("Delta 5:", deltaX5.simplify())
print("Delta 4:", deltaX4.simplify())
print("Delta 3:", deltaX3.simplify())
print("Delta 2:", deltaX2.simplify())

# Get the second differences
double_deltaX5 = deltaX5 - deltaX4
double_deltaX4 = deltaX4 - deltaX3
double_deltaX3 = deltaX3 - deltaX2

# Print the second differences
print("Double delta 5:", double_deltaX5.simplify())
print("Double delta 4:", double_deltaX4.simplify())
print("Double delta 3:", double_deltaX3.simplify())


Delta 5: W1 + W2 + W3 + W4 + W5
Delta 4: W1 + W2 + W3 + W4
Delta 3: W1 + W2 + W3
Delta 2: W1 + W2
Double delta 5: W5
Double delta 4: W4
Double delta 3: W3


### 11. Autocorrelation as a Diagnostic Tool

#### Seasonal Variation and ACF


In [5]:
ts = np.array([
    -4, -3, -2, -1, 0, 1, 2, 3, 4
])


def autocovariance(ts, h):
    """
    Function that calculates the autocovariance function estimator
    based on a time series and a time shifter.
    """

    # We initialize the autocovariance function estimator
    gamma = 0

    # We multiply the ith element of the time series by the
    # (i + h)th element of the time series and add it to gamma
    for i in range(len(ts)):
        if i + h < len(ts):
            gamma += ts[i] * ts[i + h]
        else:
            continue

    # Average the autocovariance function estimator
    gamma = gamma / len(ts)
    return gamma


# Print the autocovariance function estimator
for i in range(7):
    print(f"Gamma ({i}):", autocovariance(ts, i))


Gamma (0): 6.666666666666667
Gamma (1): 4.444444444444445
Gamma (2): 2.3333333333333335
Gamma (3): 0.4444444444444444
Gamma (4): -1.1111111111111112
Gamma (5): -2.2222222222222223
Gamma (6): -2.7777777777777777


#### Seasonal Variation and ACF (Part 2)


In [6]:
ts = np.array([
    -1, 0, 1, 0, -1, 0, 1, 0
])

# Calculate the autocovariance function estimator
for i in range(6):
    print(f"Gamma ({i}):", autocovariance(ts, i))


Gamma (0): 0.5
Gamma (1): 0.0
Gamma (2): -0.375
Gamma (3): 0.0
Gamma (4): 0.25
Gamma (5): 0.0
