In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from statsmodels.tsa.stattools import acf
from statsmodels.stats.diagnostic import acorr_ljungbox

df = pd.read_csv("/Users/williambourque/Downloads/QCAR628BIS.csv")
df['DATE'] = pd.to_datetime(df['DATE'])
df.set_index('DATE', inplace=True)
print(df.head())

Time Series Econometrics Assignment 1

Part 1 - Preliminary Analysis

1. a) Plot the log of house prices through time

In [None]:
df['log_HP'] = np.log(df['QCAR628BIS'])
sns.lineplot(data=df, x='DATE', y='log_HP')
plt.title("log(HP_t)")
plt.show()

Interpretation:
log(HP_t) - log(HP_{t-1}) represents the percentage of gain (if positive) or loss (if negative) on house prices from time {t-1} to time {t}

1. b) Apply the transformation to stationarize z_t and plot the results (determinist trend)

In [None]:
T = df.shape[0]  # Number of observations
trend = np.arange(1, T + 1)  # Linear time trend 

X = np.column_stack((np.ones(T), trend))  # Column of ones for intercept, trend as second column

# Dependent variable
Y = np.array(df['log_HP'])  # Assuming log_HP is the dependent variable in the DataFrame

# Solve for B using the OLS formula
B = np.linalg.lstsq(X, Y, rcond=None)[0]  #OLS solution

# Calculate the detrended series
df['detrend_HP'] = Y - np.dot(X, B)  

print("Beta coefficients (B):", B)
print(df.head())

# Plot the graph of the determinist trend deviation
sns.lineplot(data=df, x='DATE', y='detrend_HP')
plt.title('Determinist Trend Deviation')
plt.show()

1. c) Apply the transformation to stationarize z_t and plot the results (stochastic trend)

In [None]:
df['diff_HP'] = df['log_HP'].pct_change() # Create a column of the pct change between each periods

print(df.head())

# Plot of the log avariation assuming a stochastiv trend
sns.lineplot(data=df, x='DATE', y='diff_HP')
plt.title("Difference Series")
plt.show()

1. d) Analyze the auto-corelation functions

In [None]:
acf_vector_deter = acf(df['detrend_HP'], nlags=20) # Autocorrelation determinist
print(acf_vector_deter)
acf_vector_stocha = acf(df['diff_HP'].iloc[1:], nlags=20) # Autocorrelation stochastic
print(acf_vector_stocha)

plt.plot(acf_vector_deter) # Plot of determinist autocorrelations
plt.show()
plt.plot(acf_vector_stocha) # Plot of stochastic autocorrelations
plt.show()

Interpretation : Stochastic hypothesis is better to to estimate a time series model, the auto-correlation goes to 0 much faster which indicates that there could be white noise. White noise hypothesis is refused in the case od the deterministic trend.

1. e) Perform Lyung-Box test on the stochastic trend series

In [None]:
lags = range(1, 19)  # Creat a vector for lags 1 to 18

# Perform the Ljung-Box Q-test
lbq_result = acorr_ljungbox(df['diff_HP'].iloc[1:], lags=lags, return_df=True)

p_values = lbq_result['lb_pvalue']
test_statistics = lbq_result['lb_stat']
h1 = (p_values < 0.05).astype(int)  # Binary decision rule (1 = reject null hypothesis)

# Print the results
print("Decision Rule (h1):", h1.values)
print("P-Values:", p_values.values)
print("Test Statistics:", test_statistics.values)

Interpretation : We reject the hypothesis that there is correlation between the observation of the stochastic trend series. We cann conclude that the series is stationnary.