## installing dependencies (only needed once)

In [None]:
%pip install numpy
%pip install matplotlib
%pip install seaborn
%pip install pandas
%pip install statsmodels

## importing dependencies

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from statsmodels.tsa.stattools import acf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.arima.model import ARIMA
from scipy.stats import chi2

## Loading the data

In [None]:
df = pd.read_csv("./data.csv")
df['DATE'] = pd.to_datetime(df['DATE'])
df.set_index('DATE', inplace=True)
print(df.head())

Time Series Econometrics Assignment 1

Part 1 - Preliminary Analysis

1. a) Plot the log of house prices through time

In [None]:
df['log_HP'] = np.log(df['QCAR628BIS'])
sns.lineplot(data=df, x='DATE', y='log_HP')
plt.title("log(HP_t)")
plt.show()

Interpretation:
log(HP_t) - log(HP_{t-1}) represents the percentage of gain (if positive) or loss (if negative) on house prices from time {t-1} to time {t}

1. b) Apply the transformation to stationarize z_t and plot the results (determinist trend)

In [None]:
T = df.shape[0]  # Number of observations
trend = np.arange(1, T + 1)  # Linear time trend 

X = np.column_stack((np.ones(T), trend))  # Column of ones for intercept, trend as second column

# Dependent variable
Y = np.array(df['log_HP'])  # Assuming log_HP is the dependent variable in the DataFrame

# Solve for B using the OLS formula
B = np.linalg.lstsq(X, Y, rcond=None)[0]  #OLS solution

# Calculate the detrended series
df['detrend_HP'] = Y - np.dot(X, B)  

print("Beta coefficients (B):", B)
print(df.head())

# Plot the graph of the determinist trend deviation
sns.lineplot(data=df, x='DATE', y='detrend_HP')
plt.title('Determinist Trend Deviation')
plt.show()

1. c) Apply the transformation to stationarize z_t and plot the results (stochastic trend)

In [None]:
df['diff_HP'] = df['log_HP'].pct_change() # Create a column of the pct change between each periods

print(df.head())

# Plot of the log avariation assuming a stochastiv trend
sns.lineplot(data=df, x='DATE', y='diff_HP')
plt.title("Difference Series")
plt.show()

1. d) Analyze the auto-corelation functions

In [None]:
acf_vector_deter = acf(df['detrend_HP'], nlags=20) # Autocorrelation determinist
print(acf_vector_deter)
acf_vector_stocha = acf(df['diff_HP'].iloc[1:], nlags=20) # Autocorrelation stochastic
print(acf_vector_stocha)

plt.plot(acf_vector_deter) # Plot of determinist autocorrelations
plt.show()
plt.plot(acf_vector_stocha) # Plot of stochastic autocorrelations
plt.show()

Interpretation : Stochastic hypothesis is better to to estimate a time series model, the auto-correlation goes to 0 much faster which indicates that there could be white noise. White noise hypothesis is refused in the case od the deterministic trend.

1. e) Perform Lyung-Box test on the stochastic trend series

In [None]:
lags = range(1, 19)  # Creat a vector for lags 1 to 18

# Perform the Ljung-Box Q-test
lbq_result = acorr_ljungbox(df['diff_HP'].iloc[1:], lags=lags, return_df=True)

p_values = lbq_result['lb_pvalue']
test_statistics = lbq_result['lb_stat']
h1 = (p_values < 0.05).astype(int)  # Binary decision rule (1 = reject null hypothesis)

# Print the results
print("Decision Rule (h1):", h1.values)
print("P-Values:", p_values.values)
print("Test Statistics:", test_statistics.values)

Interpretation : We reject the hypothesis that there is correlation between the observation of the stochastic trend series. We can conclude that the series is stationnary and white noise.

## Question 2

### 2.a) Estimate all 4 models by maximum likelihood, report the estimation results, and verify if the stationarity conditions are satisfied

In [8]:
yt = df['diff_HP'].asfreq('QS').dropna()

### Model 1

In [None]:
# this is a regular AR(1) model so we can use the statsmodel library
model1 = ARIMA(yt, order=(1,0,0), enforce_stationarity=False)
fitted_model1 = model1.fit()

phi1 = fitted_model1.arparams[0]
delta = fitted_model1.params['const']
variance = fitted_model1.params['sigma2']


print(f'fitter parameters: (phi1, {phi1}), (delta, {delta}), (sigma,{np.sqrt(variance)})')
if abs(phi1) < 1:
    print(f'Since |{phi1}| < 1, the model is stationary')
else:
    print(f'Since |{phi1}| > 1, the model is not stationary')
print(fitted_model1.summary())


### Interpretation
Given that the phi1 parameter is less than 1 in absolute value, the model is stationary. Furthermore, the p-values obtained from the t-tests on the parameters is less than 0.05, meaning we can reject the null hypothesis that the parameters are equal to 0. There is therefore statistically significant evidence that the parameters contribute meaningfully to the model.

### Model 2

In [None]:
# this is a regular AR(2) model so we can use the statsmodel library
model2 = ARIMA(yt, order=(2,0,0), enforce_stationarity=False)
fitted_model2 = model2.fit()

phi1 = fitted_model2.arparams[0]
phi2 = fitted_model2.arparams[1]
delta = fitted_model2.params['const']
variance = fitted_model2.params['sigma2']


print(f'fitter parameters: (phi1, {phi1}), (phi2, {phi2}) (delta, {delta}), (sigma,{np.sqrt(variance)})')
print(fitted_model2.summary())
phi_matrix = np.array([[phi1,phi2],[1,0]])
eigenvalues,_ = np.linalg.eig(phi_matrix)
print(f'The eigenvalues are {eigenvalues}')
for eigenvalue in eigenvalues:
    print(f'module of {eigenvalue} is {np.abs(eigenvalue)}')


### Interpretation
Given that the module of the eigenvalues are less than 1, we can conclude that the process is stationary. Furthermore, since the p-value of the various t-tests done on the parameters are less than 0.01, there is strong statistical evidence that we can reject the null hypothesis, meaning the parameters contribute to the model

### Model 3

In [None]:
# this is a regular AR(3) model so we can use the statsmodel library
model3 = ARIMA(yt, order=(3,0,0), enforce_stationarity=False)
fitted_model3 = model3.fit()

phi1 = fitted_model3.arparams[0]
phi2 = fitted_model3.arparams[1]
phi3 = fitted_model3.arparams[2]
delta = fitted_model3.params['const']
variance = fitted_model3.params['sigma2']


print(f'fitter parameters: (phi1, {phi1}), (phi2, {phi2}), (phi3, {phi3}) (delta, {delta}), (sigma,{np.sqrt(variance)})')
print(fitted_model3.summary())
phi_matrix = np.array([[phi1,phi2,phi3],[1,0,0], [0,1,0]])
eigenvalues,_ = np.linalg.eig(phi_matrix)
print(f'The eigenvalues are {eigenvalues}')
for eigenvalue in eigenvalues:
    print(f'module of {eigenvalue} is {np.abs(eigenvalue)}')



### Interpretation
Given that the module of the eigenvalues are less than 1, we can conclude that the process is stationary. Furthermore, since the p-value of the various t-tests done on the parameters are less than 0.05, there is strong statistical evidence that we can reject the null hypothesis, meaning the parameters contribute to the model meaningfully.

### Model 4

In [None]:
# this is an AR(3) model but with a fixed value of 0 for phi2
model4 = ARIMA(yt, order=(3,0,0), enforce_stationarity=False)
with model4.fix_params({'ar.L2': 0}):
    fitted_model4 = model4.fit()

phi1 = fitted_model4.arparams[0]
phi2 = fitted_model4.arparams[1]
phi3 = fitted_model4.arparams[2]
delta = fitted_model4.params['const']
variance = fitted_model4.params['sigma2']


print(f'fitter parameters: (phi1, {phi1}), (phi2, {phi2}), (phi3, {phi3}) (delta, {delta}), (sigma,{np.sqrt(variance)})')
print(fitted_model4.summary())
phi_matrix = np.array([[phi1,phi2,phi3],[1,0,0], [0,1,0]])
eigenvalues,_ = np.linalg.eig(phi_matrix)
print(f'The eigenvalues are {eigenvalues}')
for eigenvalue in eigenvalues:
    print(f'module of {eigenvalue} is {np.abs(eigenvalue)}')

### Interpretation
Given that the module of the eigenvalues are less than 1, we can conclude that the process is stationary. However, the p-value for the constant parameter is > 0.05, which means there is weak statistical evidence that this parameter contributes meaningfully to the model. The other parameters seem to contribute meaningfully.

### 2.b) Perform 2 likelihood ratio tests to justify the selection of models. The first test should discriminate between model (1) and (2), the second one should discriminate between model (3) and (4). Finally, choose one of the two remaining models using the BIC criterion

In [39]:
# we first define a function to help us perform the test
def likelihood_ratio_test(l1,l0,k, alpha):
    statistic = 2*(l1 - l0)
    critical_value = chi2.ppf(1 - alpha, k)
    print(f'{statistic=}, {critical_value=}')
    print('p-value:',1 - chi2.cdf(statistic, k)) # P(chi2 > statistic)
    return statistic > critical_value


In [None]:

model_from_first_test = None
model_from_second_test = None
can_reject_null = likelihood_ratio_test(fitted_model2.llf, fitted_model1.llf, 1, 0.05)
if can_reject_null:
    print('we can reject the null hypothesis, meaning we pick the model 2')
    model_from_first_test = fitted_model2
else:
    print('we cannot reject the null hypothesis, meaning we pick the model 1')
    model_from_first_test = fitted_model1

can_reject_null = likelihood_ratio_test(fitted_model3.llf, fitted_model4.llf, 1, 0.05)
if can_reject_null:
    print('we can reject the null hypothesis, meaning we pick the model 3')
    model_from_second_test = fitted_model3
else:
    print('we cannot reject the null hypothesis, meaning we pick the model 4')
    model_from_second_test = fitted_model4

print(f'BIC values of selected models:{model_from_first_test.bic,model_from_second_test.bic}')
chosen_model = model_from_first_test if model_from_first_test.bic < model_from_second_test.bic else model_from_second_test

### Interpretation 
Since the BIC value is smaller for model 1, we pick it over model 3. It makes sense since model 1 has less parameters.

### 2.c) Evaluate the white noise hypothesis for the residual of the chosen model. What can you conclude?

In [None]:
# we perform Ljung-Box on the residuals
residuals = chosen_model.resid
lags = range(1, 19)  # Creat a vector for lags 1 to 18

# Perform the Ljung-Box Q-test
lbq_result = acorr_ljungbox(residuals, lags=lags, return_df=True)

p_values = lbq_result['lb_pvalue']
test_statistics = lbq_result['lb_stat']
h1 = (p_values < 0.05).astype(int)  # Binary decision rule (1 = reject null hypothesis)

# Print the results
print("Decision Rule (h1):", h1.values)
print("P-Values:", p_values.values)
print("Test Statistics:", test_statistics.values)
acf(residuals,nlags=20)


### Interpretation
We can reject the null hypothesis for all the lags except 1. This indicates that the residuals are autocorrelated and are therefore not white noise. Our model is therefore a poor fit for the data that we have