In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from numba import njit, prange
import time



# Save results

In [2]:
import os
import pickle 

FILE_PATH = "dict_gibbs_simulations.pkl"

def save_dictionary(dict_):
    with open(FILE_PATH, 'wb') as f:
        pickle.dump(dict_, f)

def load_dictionary():
    with open(FILE_PATH, 'rb') as f:
        loaded_dict = pickle.load(f)
    return loaded_dict

# 1. Simulate data

$$y_t = u'_t \phi + x'_t \beta + \epsilon_t, \epsilon_t \stackrel{\text{i.i.d}}{\sim} \mathcal{N}(0, \sigma^{2}) $$
In our case, as the dimension of U is l = 0, we have 
$$y_t = x'_t \beta + \epsilon_t, \epsilon_t \stackrel{\text{i.i.d}}{\sim} \mathcal{N}(0, \sigma^{2}) $$

In [3]:
l = 0
k = 100
T = 200

In [4]:
a, b, A, B = 1, 1, 1, 1

### Draw initial q

In [5]:
def draw_initial_q(a=a, b=b):
    return np.random.beta(a, b)

### Draw initial $R²$

In [6]:
def draw_initial_R2(A=A, B=B):
    return np.random.beta(A, B)

### Compute initial $\gamma^2$

In [7]:
@njit
def compute_gamma2(X, r2, q, v_x_bar, k=k):
    denominator = q * k * v_x_bar * (1-r2)
    return r2 / denominator

### Draw initial $\sigma²$

In [8]:
def draw_inital_sigma_squared(X, beta, Ry, T=T):
    return ((1/Ry) - 1) * (1/T) * sum([(beta.T @ x)**2 for x in X])

### Draw initial $\beta$

In [9]:
def draw_initial_beta(s, k=k):
    beta = np.random.normal(size=k)
    indices_null = np.random.choice(range(k), size=k-s, replace=False)
    beta[indices_null] = 0
    z = np.ones(k)
    z[indices_null] = 0
    return beta, z

### Simulate X

In [10]:
from scipy.linalg import toeplitz

In [11]:
rho = 0.75

In [12]:
def draw_X(rho=rho, k=k, T=T):
    correlation_matrix = toeplitz(rho ** np.arange(0, k))
    X = np.random.multivariate_normal(np.zeros(k), correlation_matrix, size=T)
    for i in range(len(X)):
        X[i] = (X[i] - X[i].mean()) / np.std(X[i])
    return X

### Simulate Y

In [13]:
def draw_Y(X, beta, sigma_squared):
    return X @ beta + np.random.normal(0, sigma_squared, size=(X @ beta).shape[0])

In [14]:
def draw_dataset(Ry, s, rho=rho, k=k, T=T):
    X = draw_X()
    beta, z = draw_initial_beta(s, k=k)
    sigma_squared = draw_inital_sigma_squared(X, beta, Ry, T=T)
    Y = draw_Y(X, beta, sigma_squared)
    return Y, X, beta, sigma_squared, z

# 2. Draw from posteriors

In [15]:
@njit(nogil=True)
def inverse_matrix(A):
    return np.linalg.inv(A)

In [16]:
@njit(nogil=True)
def draw_tildes(Y, X, beta, gamma2, z):
    X_tilde = X[:, z != 0]
    beta_tilde = beta[z != 0]
    W_tilde = X_tilde.T @ X_tilde + (1/gamma2) * np.eye(int(np.sum(z)))
    Y_tilde = Y 
    inverse_W_tilde = inverse_matrix(W_tilde)
    beta_tilde_hat = inverse_W_tilde @ X_tilde.T @ Y_tilde
    return X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat, inverse_W_tilde

### (I). Draw from $R²$

In [17]:
@njit
def draw_r2(X, z, beta, sigma_squared, v_x_bar, q_grid):
    s_z = np.sum(z)
    r2_grid = q_grid
    posterior = np.zeros((len(q_grid), len(r2_grid)))

    beta_term = np.dot(np.dot(beta.T.astype(np.float64), np.diag(z).astype(np.float64)), beta.astype(np.float64))

    q_term = q_grid ** (s_z + s_z/2 + a - 1) * (1 - q_grid) ** (k - s_z + b - 1)

    for j, r2_loop in enumerate(r2_grid):
        exp_term = np.exp((-1/(2*sigma_squared)) * ((k * v_x_bar * q_grid * (1 - r2_loop)) / r2_loop) * beta_term)
        r2_term = r2_loop ** (A - 1 - s_z/2) * (1 - r2_loop) ** (s_z/2 + B - 1)
        posterior[:, j] = exp_term * q_term * r2_term

    # normalize the posterior
    posterior = posterior.flatten() / posterior.sum()
    x = np.argmax(np.random.multinomial(1, posterior))
    i, j = x // len(q_grid), x % len(r2_grid)
    sampled_q = q_grid[i]
    sampled_r2 = r2_grid[j]
    return sampled_r2, sampled_q

### (III). Draw from z

In [18]:
import scipy.special as sp

In [19]:
@njit
def norm_1(matrix):
    norm_1 = np.sum(np.abs(matrix), axis=0).max()
    return norm_1

@njit
def bernouilli(q):
    u = np.random.uniform(0, 1)
    return 1 if u < q else 0

In [20]:
@njit(nogil=True)
def draw_z(Y, X, beta, gamma2, z, q, k=k, T=T):
    u = np.random.uniform(0, 1, z.shape)
    proba_success = 1 / (1 + (1-q)/q * np.sqrt(gamma2))
    sampled_z = (u < q).astype(np.int16)
    while np.count_nonzero(sampled_z) == 0:
        u = np.random.uniform(0, 1, z.shape)
        sampled_z = (u < q).astype(np.int16)
    return sampled_z

### (IV). Draw from $\sigma²$

In [21]:
import scipy.stats as stats

In [22]:
@njit
def var_sigma_squared(Y, X, beta, gamma2, z):
    _, _, W_tilde, Y_tilde, beta_tilde_hat, _ = draw_tildes(Y, X, beta, gamma2, z)
    variance = (1/2) * (Y_tilde.T @ Y_tilde - beta_tilde_hat.T @ W_tilde @ beta_tilde_hat)
    return variance

def draw_sigma(Y, X, beta, gamma2, z):
    # X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat = draw_tildes(Y, X, beta, gamma2, z)
    # variance = (1/2) * (Y_tilde.T @ Y_tilde - beta_tilde_hat.T @ W_tilde @ beta_tilde_hat)
    variance = var_sigma_squared(Y, X, beta, gamma2, z)
    return stats.invgamma.rvs(T/2, scale=variance)

### (V). Draw from $\tilde{\beta}$

In [23]:
def reconstruct_beta(beta_tilde, z, k=k):
    reconstructed_beta = np.zeros(k)
    reconstructed_beta[z != 0] = beta_tilde
    return reconstructed_beta

In [24]:
@njit
def mean_var_beta_tilde(Y, X, beta, gamma2, z, sigma_squared):
    X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat, inverse_W_tilde = draw_tildes(Y, X, beta, gamma2, z)
    mean = beta_tilde_hat
    variance = sigma_squared * inverse_W_tilde
    return mean, variance

def draw_beta_tilde(Y, X, beta, gamma2, z, sigma_squared):
    # X_tilde, beta_tilde, W_tilde, Y_tilde, beta_tilde_hat = draw_tildes(Y, X, beta, gamma2, z)
    # mean = inverse_matrix(W_tilde) @ X_tilde.T @ Y_tilde
    # variance = sigma_squared * inverse_matrix(W_tilde)
    mean, variance = mean_var_beta_tilde(Y, X, beta, gamma2, z, sigma_squared)
    return np.random.multivariate_normal(mean, variance)

# 3. Gibbs Sampler

In [25]:
import multiprocessing
from multiprocessing import Pool

In [26]:
n_iter = 10_000
burn_in = 1000
nb_cpu_cores = multiprocessing.cpu_count()
print(f"Nb CPU cores: {nb_cpu_cores}")

Nb CPU cores: 12


In [27]:
def gibbs_sampler(Ry, s, n_iter=n_iter, burn_in=10_000, k=k, T=T, a=a, b=b, A=A, B=B, display_=False):

    ### initialize dataset ###
    q = draw_initial_q(a=a, b=b)
    r2 = draw_initial_R2(A=A, B=B)
    Y, X, beta, sigma_squared, z = draw_dataset(Ry, s, rho=rho, k=k, T=T)
    v_x_bar = (1/k) * sum([np.var(x) for x in X])
    gamma2 = compute_gamma2(X, r2, q, v_x_bar, k=k)
    q_grid = np.concatenate((np.arange(0.001, 0.1, 0.001), np.arange(0.1, 0.9, 0.01), np.arange(0.9, 1, 0.001)))
    
    parameters = {}
    parameters['sigma_squared'] = []
    parameters['r2'] = []
    parameters['q'] = []
    
    range_ = tqdm(range(n_iter)) if display_ else range(n_iter)
    for i in range_:
        r2, q = draw_r2(X, z, beta, sigma_squared, v_x_bar, q_grid)
        z = draw_z(Y, X, beta, gamma2, z, q, k=k, T=T)
        sigma_squared = draw_sigma(Y, X, beta, gamma2, z)
        beta_tilde = draw_beta_tilde(Y, X, beta, gamma2, z, sigma_squared)

        if i >= burn_in:
            parameters['sigma_squared'].append(sigma_squared)
            parameters['r2'].append(r2)
            parameters['q'].append(q)
        
        # reconstruct beta from drawn beta_tilde and z
        beta = reconstruct_beta(beta_tilde, z, k=k)
        gamma2 = compute_gamma2(X, r2, q, v_x_bar, k=k)
        
    return parameters, beta

### Run Gibbs sampler for each dataset

In [28]:
all_values = []
Ry_values = [0.02, 0.25, 0.5]
s_values = [5, 10, 100]
for Ry in Ry_values:
    for s in s_values:
        all_values.append((Ry, s))
print(all_values)

[(0.02, 5), (0.02, 10), (0.02, 100), (0.25, 5), (0.25, 10), (0.25, 100), (0.5, 5), (0.5, 10), (0.5, 100)]


In [29]:
dict_results = load_dictionary()

In [None]:
for Ry, s in all_values:
    while len(dict_results[f"{Ry}, {s}"]) < 100:
        print(f"len[{Ry}, {s}] -> {len(dict_results[f'{Ry}, {s}'])}")
        parameters, final_beta = gibbs_sampler(Ry, s, n_iter=n_iter, burn_in=burn_in, k=k, T=T, a=a, b=b, A=A, B=B, display_=True)
        dict_results[f"{Ry}, {s}"].append([parameters, final_beta])
    save_dictionary(dict_results)

len[0.02, 5] -> 26


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:54<00:00, 57.32it/s]


len[0.02, 5] -> 27


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:26<00:00, 68.26it/s]


len[0.02, 5] -> 28


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:43<00:00, 61.14it/s]


len[0.02, 5] -> 29


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [03:19<00:00, 50.10it/s]


len[0.02, 5] -> 30


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [03:06<00:00, 53.63it/s]


len[0.02, 5] -> 31


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:43<00:00, 60.98it/s]


len[0.02, 5] -> 32


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:41<00:00, 61.76it/s]


len[0.02, 5] -> 33


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:12<00:00, 75.47it/s]


len[0.02, 5] -> 34


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:36<00:00, 63.82it/s]


len[0.02, 5] -> 35


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:36<00:00, 63.97it/s]


len[0.02, 5] -> 36


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:02<00:00, 81.52it/s]


len[0.02, 5] -> 37


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:41<00:00, 61.82it/s]


len[0.02, 5] -> 38


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:32<00:00, 65.54it/s]


len[0.02, 5] -> 39


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 60.16it/s]


len[0.02, 5] -> 40


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [03:05<00:00, 53.84it/s]


len[0.02, 5] -> 41


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:57<00:00, 56.21it/s]


len[0.02, 5] -> 42


 34%|██████████████████████████                                                   | 3382/10000 [00:49<01:37, 67.87it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\users\trist\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-6ddaccbbaa6f>", line 4, in <module>
    parameters, final_beta = gibbs_sampler(Ry, s, n_iter=n_iter, burn_in=burn_in, k=k, T=T, a=a, b=b, A=A, B=B, display_=True)
  File "<ipython-input-27-169fe43c0d8e>", line 21, in gibbs_sampler
    beta_tilde = draw_beta_tilde(Y, X, beta, gamma2, z, sigma_squared)
  File "<ipython-input-24-2cf45cf7bdf9>", line 13, in draw_beta_tilde
    return np.random.multivariate_normal(mean, variance)
  File "numpy\\random\\mtrand.pyx", line 4235, in numpy.random.mtrand.RandomState.multivariate_normal
  File "c:\users\trist\appdata\local\programs\python\python39\lib\site-packages\numpy\linalg\linalg.py", line 1681, in svd
    u, s, vh = gufunc(a, signature=signature, extobj=extobj)
KeyboardInterrupt

During handli

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\users\trist\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-6ddaccbbaa6f>", line 4, in <module>
    parameters, final_beta = gibbs_sampler(Ry, s, n_iter=n_iter, burn_in=burn_in, k=k, T=T, a=a, b=b, A=A, B=B, display_=True)
  File "<ipython-input-27-169fe43c0d8e>", line 21, in gibbs_sampler
    beta_tilde = draw_beta_tilde(Y, X, beta, gamma2, z, sigma_squared)
  File "<ipython-input-24-2cf45cf7bdf9>", line 13, in draw_beta_tilde
    return np.random.multivariate_normal(mean, variance)
  File "numpy\\random\\mtrand.pyx", line 4235, in numpy.random.mtrand.RandomState.multivariate_normal
  File "c:\users\trist\appdata\local\programs\python\python39\lib\site-packages\numpy\linalg\linalg.py", line 1681, in svd
    u, s, vh = gufunc(a, signature=signature, extobj=extobj)
KeyboardInterrupt

During handli