In [1]:
import numpy as np
from tqdm import tqdm

# 1. Simulate data

$$y_t = u'_t \phi + x'_t \beta + \epsilon_t, \epsilon_t \stackrel{\text{i.i.d}}{\sim} \mathcal{N}(0, \sigma^{2}) $$
In our case, as the dimension of U is l = 0, we have 
$$y_t = x'_t \beta + \epsilon_t, \epsilon_t \stackrel{\text{i.i.d}}{\sim} \mathcal{N}(0, \sigma^{2}) $$

In [2]:
l = 0
k = 100
T = 200

In [3]:
a, b, A, B = 1, 1, 1, 1

### Draw initial q

In [4]:
def draw_initial_q(a=a, b=b):
    return np.random.beta(a, b)

### Draw initial $R²$

In [5]:
def draw_initial_R2(A=A, B=B):
    return np.random.beta(A, B)

### Draw initial $\sigma²$

In [6]:
def draw_inital_sigma_squared(X, beta, Ry, T=T):
    return ((1/Ry) - 1) * (1/T) * sum([(beta.T @ x)**2 for x in X])

### Draw initial $\beta$

In [7]:
def draw_initial_beta(s, k=k):
    beta = np.random.normal(size=k)
    indices_null = np.random.choice(range(k), size=k-s, replace=False)
    beta[indices_null] = 0
    z = np.ones(k)
    z[indices_null] = 0
    return beta, z

### Simulate X

In [8]:
from scipy.linalg import toeplitz

In [9]:
rho = 0.75

In [10]:
def draw_X(rho=rho, k=k, T=T):
    correlation_matrix = toeplitz(rho ** np.arange(0, k))
    return np.random.multivariate_normal(np.zeros(k), correlation_matrix, size=T)

### Simulate Y

In [11]:
def draw_Y(X, beta, sigma_squared):
    return X @ beta + np.random.normal(0, sigma_squared, size=(X @ beta).shape[0])

In [12]:
def draw_dataset(Ry, s, rho=rho, k=k, T=T):
    X = draw_X()
    beta, z = draw_initial_beta(s, k=k)
    sigma_squared = draw_inital_sigma_squared(X, beta, Ry, T=T)
    Y = draw_Y(X, beta, sigma_squared)
    return Y, X, beta, sigma_squared, z

# 2. Draw from posteriors

In [13]:
def inverse_matrix(A):
    try:
        return np.linalg.solve(A, np.eye(A.shape[0]))
    except:
        return A

In [14]:
def draw_tildes(Y, X, beta_tilde, gamma, z):
    X_tilde = X[:, z != 0]
    #beta_tilde = beta[z != 0]
    W_tilde = X_tilde.T @ X_tilde + (1/gamma**2) * np.eye(int(np.sum(z)))
    Y_tilde = Y 
    beta_tilde_hat = inverse_matrix(W_tilde) @ X_tilde.T @ Y_tilde
    return X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat

### (I). Draw from $R²$

In [15]:
def draw_r2(X, z, beta, sigma_squared, k=k, a=a, b=b, A=A, B=B):
    s_z = np.sum(z)
    v_x_bar = (1/k) * sum([np.var(x) for x in X])
    q_grid = np.concatenate((np.arange(0, 0.1, 0.001), np.arange(0.1, 0.9, 0.01), np.arange(0.9, 1.001, 0.001)))
    r2_grid = q_grid[1:] # we remove the first term of the grid which is 0 so as not to divide by 0
    posterior = np.zeros((len(q_grid), len(r2_grid)))
    
    beta_term = (beta.T @ np.diag(z) @ beta)
    
    for i in range(len(q_grid)):
        q = q_grid[i]
        q_term = q ** (s_z + s_z/2 + a - 1) * (1-q) ** (k - s_z + b - 1)
        
        for j in range(len(r2_grid)):
            r2 = r2_grid[j]
            
            exp_term = np.exp((-1/(2*sigma_squared)) * ((k * v_x_bar * q * (1 - r2)) / r2) * beta_term)
            r2_term = r2 ** (A - 1 - s_z/2) * (1-r2) ** (s_z/2 + B - 1)
            posterior[i, j] = exp_term * q_term * r2_term
    
    # normalize the posterior
    posterior = posterior.flatten() / np.sum(posterior)
    
    x = np.argmax(np.random.multinomial(1, posterior))
    i, j = np.unravel_index(x, np.zeros((len(q_grid), len(r2_grid))).shape)
    q = q_grid[i]
    r2 = r2_grid[j]
    return r2, q

### (III). Draw from z

In [16]:
import scipy.special as sp

In [17]:
clip_down = lambda x: np.clip(x, a_min=1e-150, a_max=None)

def draw_zi(Y, X, beta, gamma, z, i, q, k=k, T=T):
    
    # Exclude the i-th element from z and X
    z_minus_i = np.delete(z, i)
    X_minus_i = np.delete(X, i, axis=1)
    beta_minus_i = np.delete(beta, i)
    X_tilde_minus_i, beta_tilde_minus_i, W_tilde_minus_i, Y_tilde, beta_tilde_hat_minus_i = draw_tildes(Y, X_minus_i , beta_minus_i , gamma, z_minus_i)
    
    s_z_minus_i = np.sum(z_minus_i)
    
    q_term = q ** s_z_minus_i * (1-q) ** (k-s_z_minus_i)
    gamma_term = (1/(gamma**2)) ** (s_z_minus_i/2) * np.linalg.norm(W_tilde_minus_i) ** (-1/2)
    fraction_term = clip_down(((Y_tilde.T @ Y_tilde - beta_tilde_hat_minus_i.T @ W_tilde_minus_i @ beta_tilde_hat_minus_i)/2) ** (-T/2))
    posterior = q_term * gamma_term * fraction_term * sp.gamma(T/2)
    return posterior

In [18]:
def draw_z(Y, X, beta_tilde, gamma, z, q, k=k, T=T):
    return np.random.binomial(1, q, size=k)

### (IV). Draw from $\sigma²$

In [19]:
import scipy.stats as stats

In [20]:
def draw_sigma(Y, X, beta_tilde, gamma, z):
    X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat = draw_tildes(Y, X, beta_tilde, gamma, z)
    variance = (1/2) * (Y_tilde.T @ Y_tilde - beta_tilde_hat.T @ W_tilde @ beta_tilde_hat)
    return stats.invgamma.rvs(T/2, variance)

### (V). Draw from $\tilde{\beta}$

In [21]:
def draw_beta_tilde(Y, X, beta_tilde, gamma, z, sigma_squared):
    X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat = draw_tildes(Y, X, beta_tilde, gamma, z)
    mean = inverse_matrix(W_tilde) @ X_tilde.T @ Y_tilde
    variance = sigma_squared * inverse_matrix(W_tilde)
    return np.random.multivariate_normal(mean, variance, size=X_tilde.shape[1])

# 3. Gibbs Sampler

In [22]:
n_iter = 110_000

In [23]:
def gibbs_sampler(Ry, s, n_iter=n_iter, k=k, T=T, a=a, b=b, A=A, B=B, display_=False):
    
    ### initialize dataset ###
    gamma = np.random.uniform()
    q = draw_initial_q(a=a, b=b)
    r2 = draw_initial_R2(A=A, B=B)
    Y, X, beta, sigma_squared, z = draw_dataset(Ry, s, rho=rho, k=k, T=T)
    beta_tilde = beta[z != 0]
    
    parameters = []
    parameters.append([beta_tilde, sigma_squared, z, r2, q])
    
    range_ = tqdm(range(n_iter)) if display_ else range(n_iter)
    for i in range_:
        r2, q = draw_r2(X, z, beta, sigma_squared, k=k, a=a, b=b, A=A, B=B)
        z = draw_z(Y, X, beta_tilde, gamma, z, q, k=k, T=T)
        sigma_squared = draw_sigma(Y, X, beta_tilde, gamma, z)
        beta_tilde = draw_beta_tilde(Y, X, beta_tilde, gamma, z, sigma_squared)
        parameters.append([beta_tilde, sigma_squared, z, r2, q])
        
    return parameters

In [None]:
params = gibbs_sampler(0.25, 5, n_iter=n_iter, k=k, T=T, a=a, b=b, A=A, B=B, display_=True)

  1%|          | 1142/110000 [34:00<70:31:28,  2.33s/it]