## Auto Regression for X(Egg) and Y(Chicken)

In [1]:
import numpy as np

data = {
    "year": [1930, 1931, 1932, 1933, 1934, 1935],
    "chicken": [468491, 449743, 436815, 444523, 433937, 389958],
    "egg": [3581, 3532, 3327, 3255, 3156, 3081],
}

# Lagged dataset
Y = np.array(data["chicken"][1:])
Y_lag = np.array(data["chicken"][:-1])
X_lag = np.array(data["egg"][:-1])

# Design matrix
X_with_egg = np.column_stack((np.ones(len(Y_lag)), Y_lag, X_lag))

# Coefficients, OLS formula accoding to the question
beta_with_egg = np.linalg.inv(X_with_egg.T @ X_with_egg) @ (X_with_egg.T @ Y)

residuals_with_egg = Y - X_with_egg @ beta_with_egg

# Calculate ||e||_2^2 (RSS or squared residual norm)
norm_e_squared = np.sum(residuals_with_egg ** 2)

print("Estimated coefficients (β):", beta_with_egg)
print("||e||_2^2 (RSS) with eggs:", norm_e_squared)


Estimated coefficients (β): [ 1.20111885e+05 -6.90430740e-02  1.01396054e+02]
||e||_2^2 (RSS) with eggs: 1023104023.6108687


##Auto Regression without Y

In [2]:
import numpy as np

data = {
    "year": [1930, 1931, 1932, 1933, 1934, 1935],
    "chicken": [468491, 449743, 436815, 444523, 433937, 389958],
    "egg": [3581, 3532, 3327, 3255, 3156, 3081],
}
Y = np.array(data["chicken"][1:])
Y_lag = np.array(data["chicken"][:-1])
#Design matrix without eggs
X_without_egg = np.column_stack((np.ones(len(Y_lag)), Y_lag))

# Coefficients using OLS
beta_without_egg = np.linalg.inv(X_without_egg.T @ X_without_egg) @ (X_without_egg.T @ Y)

residuals_without_egg = Y - X_without_egg @ beta_without_egg

# Calculate ||e||_2^2 (RSS2 or squared residual norm)
RSS2 = np.sum(residuals_without_egg ** 2)

print("RSS2 (Auto-regression without eggs):", RSS2)
print("Coefficients (β0, β1):", beta_without_egg)


RSS2 (Auto-regression without eggs): 1385892406.5564857
Coefficients (β0, β1): [-5.17190470e+04  1.08061854e+00]


#Statistical Test

In [3]:
import numpy as np
import scipy.stats as stats

data = {
    "year": [1930, 1931, 1932, 1933, 1934, 1935],
    "chicken": [468491, 449743, 436815, 444523, 433937, 389958],
    "egg": [3581, 3532, 3327, 3255, 3156, 3081],
}

Y = np.array(data["chicken"][1:])
Y_lag = np.array(data["chicken"][:-1])
X_lag = np.array(data["egg"][:-1])

# Design matrix with eggs
X_with_egg = np.column_stack((np.ones(len(Y_lag)), Y_lag, X_lag))

# Design matrix without eggs
X_without_egg = np.column_stack((np.ones(len(Y_lag)), Y_lag))

# Compute coefficients using OLS
beta_with_egg = np.linalg.inv(X_with_egg.T @ X_with_egg) @ (X_with_egg.T @ Y)
beta_without_egg = np.linalg.inv(X_without_egg.T @ X_without_egg) @ (X_without_egg.T @ Y)

#RSS
residuals_with_egg = Y - X_with_egg @ beta_with_egg
residuals_without_egg = Y - X_without_egg @ beta_without_egg

RSS_with_egg = np.sum(residuals_with_egg ** 2)
RSS_without_egg = np.sum(residuals_without_egg ** 2)

# Compute the F-statistic
n = len(Y)
d = 1
F_stat = ((RSS_without_egg - RSS_with_egg) / d) / (RSS_with_egg / (n - 2 * d))

# Critical value for F-distribution with alpha = 0.05
alpha = 0.05
critical_value = stats.f.ppf(1 - alpha, d, n - 2 * d)

print("F-statistic:", F_stat)
print("Critical F-value at alpha = 0.05:", critical_value)

# Interpret of the result
if F_stat > critical_value:
    print("Reject the null hypothesis, we can say egg causes chicken.")
else:
    print("Fail to reject the null hypothesis, we have no evidence that egg causes chicken.")


F-statistic: 1.0637873800903004
Critical F-value at alpha = 0.05: 10.127964486013928
Fail to reject the null hypothesis, we have no evidence that egg causes chicken.


##Causal Test for direction from Chicken to Eggs

In [4]:
import numpy as np
import scipy.stats as stats

#Data
data = {
    "year": [1930, 1931, 1932, 1933, 1934, 1935],
    "chicken": [468491, 449743, 436815, 444523, 433937, 389958],
    "egg": [3581, 3532, 3327, 3255, 3156, 3081],
}

# Create lagged datasets for Granger causality test
Y = np.array(data["egg"][1:])
Y_lag = np.array(data["egg"][:-1])
X_lag = np.array(data["chicken"][:-1])

# Design matrix with chicken
X_with_chicken = np.column_stack((np.ones(len(Y_lag)), Y_lag, X_lag))

# Design matrix without chicken
X_without_chicken = np.column_stack((np.ones(len(Y_lag)), Y_lag))

# Compute coefficients using OLS
beta_with_chicken = np.linalg.inv(X_with_chicken.T @ X_with_chicken) @ (X_with_chicken.T @ Y)
beta_without_chicken = np.linalg.inv(X_without_chicken.T @ X_without_chicken) @ (X_without_chicken.T @ Y)

# Calculate residuals and RSS
residuals_with_chicken = Y - X_with_chicken @ beta_with_chicken
residuals_without_chicken = Y - X_without_chicken @ beta_without_chicken

RSS_with_chicken = np.sum(residuals_with_chicken ** 2)
RSS_without_chicken = np.sum(residuals_without_chicken ** 2)

# Compute F-statistic
n = len(Y)
d = 1
F_stat = ((RSS_without_chicken - RSS_with_chicken) / d) / (RSS_with_chicken / (n - 2 * d))

# Critical value for F-distribution with alpha = 0.05
alpha = 0.05
critical_value = stats.f.ppf(1 - alpha, d, n - 2 * d)

print("F-statistic:", F_stat)
print("Critical F-value at alpha = 0.05:", critical_value)

if F_stat > critical_value:
    print("Reject the null hypothesis: Chicken causes eggs.")
else:
    print("Fail to reject the null hypothesis: No evidence that chicken causes eggs.")


F-statistic: 1.8530306411369108
Critical F-value at alpha = 0.05: 10.127964486013928
Fail to reject the null hypothesis: No evidence that chicken causes eggs.
