# Assignment 2 - MATH60837A
William Bourque - 11359215

Frederic Pelletier - 11359258

## installing dependencies (only needed once)

In [None]:
%pip install numpy
%pip install matplotlib
%pip install seaborn
%pip install pandas
%pip install statsmodels

## importing dependencies

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from statsmodels.tsa.stattools import acf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.arima.model import ARIMA
from scipy.stats import chi2
import warnings
warnings.filterwarnings("ignore")

## Loading the data

In [None]:
df = pd.read_csv('./data.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df.set_index('DATE', inplace=True)
df['log_HP'] = np.log(df['QCAR628BIS'])
df['diff_HP'] = df['log_HP'].diff()
df = df.dropna()
yt = df['diff_HP']
df.head()

# Part1.Model Selection

### 1.a) Estimate all 4 models by maximum likelihood, report the estimation results, and verify if the stationarity conditions and invertibility conditions are satisfied

In [None]:
def test_stationarity(model: ARIMA):
    p,_,q = model.specification.order
    r = max(p,q)
    phi_matrix = np.zeros((r,r))
    phis = model.arparams
    phi_matrix[0,:] = np.pad(model.arparams, (0, r - len(phis)), mode='constant')
    np.fill_diagonal(phi_matrix[1:p,:p],1)
    eigenvalues = np.linalg.eigvals(phi_matrix)
    print(phi_matrix)
    return np.all(np.abs(eigenvalues) < 1)

def test_invertibility(model:ARIMA):
    p,_,q = model.specification.order
    r = max(p,q)
    theta_matrix = np.zeros((r,r))
    thetas = model.maparams
    theta_matrix[0,:] = np.pad(thetas, (0,r - len(thetas)), mode='constant')
    np.fill_diagonal(theta_matrix[1:q,:q],1)
    eigenvalues = np.linalg.eigvals(theta_matrix)
    print(theta_matrix)
    return np.all(np.abs(eigenvalues) < 1)

In [None]:
#since we will be testing for stationarity and invertibility, we do not force the
# algorithm to ensure either of them
ar4 = ARIMA(yt, order=(4,0,0), enforce_stationarity=False, enforce_invertibility=False).fit()
arma41 = ARIMA(yt, order=(4,0,1),enforce_stationarity=False, enforce_invertibility=False).fit()
arma42 = ARIMA(yt, order=(4,0,2),enforce_stationarity=False, enforce_invertibility=False).fit()


for name,model in {'ar(4)': ar4,'arma(4,1)': arma41,'arma(4,2)': arma42}.items():
    stationary = test_stationarity(model)
    invertible = test_invertibility(model)
    print(model.summary())
    print(f'{name} is {"stationary" if stationary else "not stationary"}')
    print(f'{name} is {"invertible" if invertible else "not invertible"}')
    

### Interpretation
All models are both stationary and invertible since the eigenvalues of their respective phi and theta matrices all have modules less than 1. However, for all models, some parameters 
do not have a p-value < 0.05, meaning there is no strong statistical evidence that the parameter should be different than 0

### 1.b) Select a model among the 3 using the BIC criterion. Perform likelihood ratio tests (using a 5% significance level) to discriminate between that model and possible alternatives among the remaining 2. Evaluate the white noise hypothesis for the residuals to justify the selection of a final model.

In [None]:
# We evaluate the BIC creterion for each model
print(f'BIC for AR(4): {ar4.bic}')
print(f'BIC for ARMA(4,1): {arma41.bic}')
print(f'BIC for ARMA(4,2): {arma42.bic}')


In [None]:
# We first define a function to help us perform the test
def likelihood_ratio_test(l1,l0,k, alpha):
    statistic = 2*(l1 - l0)
    critical_value = chi2.ppf(1 - alpha, k)
    print(f'{statistic=}, {critical_value=}')
    print('p-value:',1 - chi2.cdf(statistic, k)) # P(chi2 > statistic)
    return statistic > critical_value

In [None]:
model_from_first_test = None
model_from_second_test = None
can_reject_null = likelihood_ratio_test(arma41.llf, ar4.llf, 1, 0.05)
if can_reject_null:
    print('we can reject the null hypothesis, meaning we pick the AMRA(4,1)')
    model_from_first_test = arma41
else:
    print('we cannot reject the null hypothesis, meaning we pick the AR(1)')
    model_from_first_test = ar4

can_reject_null = likelihood_ratio_test(arma42.llf, model_from_first_test.llf, 1, 0.05)
if can_reject_null:
    print('we can reject the null hypothesis, meaning we pick the ARMA(4,2)')
    model_from_second_test = arma42
else:
    print('we cannot reject the null hypothesis, meaning we pick the ARMA(4,1)')
    model_from_second_test = arma41

selected_model = model_from_second_test

In [None]:
# We perform the Lyung-Box test to make sure the residuals of the model are white noise
residuals = selected_model.resid
lags = range(1, 19)  # Creat a vector for lags 1 to 18

# Perform the Ljung-Box Q-test
lbq_result = acorr_ljungbox(residuals, lags=lags, return_df=True)

p_values = lbq_result['lb_pvalue']
test_statistics = lbq_result['lb_stat']
h1 = (p_values < 0.05).astype(int)  # Binary decision rule (1 = reject null hypothesis)

# Print the results
print("Decision Rule (h1):", h1.values)
print("P-Values:", p_values.values)
print("Test Statistics:", test_statistics.values)
acf(residuals,nlags=20)

if h1.mean() == 0:
    chosen_model = selected_model

### Interpretation 
Using the BIC creterion the selected model is the ARMA(4,1), because its value of -898.96 was the smallest among the three models. To verify the potency of this result we perform the likelihood ratio test. The first test uses the AR(4) as null hypothesis angainst the ARMA(4,1). The p-value is smaller than the chosen alpha meaning we reject the null and select the ARMA(4,1). The second test uses the selected model from the first test, ARMA(4,1) as the null against the ARMA(4,2). The p-value is greater than the chosen alpha meaning we cannot reject the null and select once again the ARMA(4,1). To verify the white noise hypothesis for the residuals we perform the Lyung-Box test. The test is concluent, we cannot reject the null for the first 18 lags, it suggests that the residuals are white noise and the model can bu useful.

# Part 2. Dynamic Response and forecasting

### 2.a) For the selected model, evaluate the dynamic response for an horizon of 10 periods following a positive shock of size sigma = 1.15 occuring at the first period of the horizon, and explain what happens.

In [None]:
print(chosen_model.params)
plt.plot(chosen_model.impulse_responses(10,[1.15]), label='response')
plt.xlabel('Periods')
plt.ylabel('Response value')
plt.title('Dynamic response following a shock of 1.15')
plt.legend()

### 2.b) For the selected model, evaluate the dynamic response for an horizon of 10 periods following a positive shock of size occurs during 3 consecutive periods of the horizon (t, t+1, t+2). Plot the impulse response function, showing the dynamic response for an horizon of 20 periods following these shocks, and explain what happens.

In [None]:
# Compute impulse response for a single shock in 'y1'
irf = chosen_model.impulse_responses(steps=20, impulse=[1.15]) 

# Create an array to store the cumulative effect of the shocks
total_irf = np.zeros_like(irf)

# Apply the shocks one period apart
for i in range(3):  # Three shocks
    total_irf[i:] += np.roll(irf, shift=i)[:len(irf)-i]

# Convert to DataFrame for visualization
irf_df = pd.DataFrame(total_irf, columns=['y'])

# Plot impulse responses
plt.plot(irf_df['y'], label="Response of y to 3 Consecutive Shocks")
plt.axhline(y=0, linestyle="--", color="black", linewidth=1)
plt.xlabel("Time")
plt.ylabel("Response")
plt.title("Impulse Response of ARMA(4,1) with 3 Consecutive Shocks of 1.15")
plt.legend()
plt.show()