In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf
from scipy.stats import expon, norm, pareto

# Exercise 2: inversion method - generate Exp($\lambda$)

Cumulative Distribution Function (CDF): $F(x) = 1 - e^{-\lambda x}$

Quantile Function (solving for x): $F^{-1}(u) = \frac{-1}{\lambda} \ln(1 - u)$

In [None]:
# Parameters
n = 100  # Number of samples
lambda_ = 3  # Parameter of the exponential distribution

# Simulate n random numbers from a U(0,1) distribution
U = np.random.rand(n)

# Transform U(0,1) random numbers using the quantile function of X
X = -np.log(1 - U) / lambda_

# Kernel density estimation
kde = gaussian_kde(X, bw_method='scott')   # Scott's rule for bandwidth
x_vals = np.linspace(0, max(X), 1000)  # Support range for the density
y_vals = kde(x_vals)

# Plot
plt.figure(figsize=(8, 6))
plt.hist(X, bins=20, density=True, alpha=0.7, edgecolor='black', label='Histogram')
plt.plot(x_vals, y_vals, color='red', lw=2, label='Kernel Density Estimation')
plt.title('Histogram and Kernel Density Estimation for X')
plt.xlabel('x')
plt.ylabel('Density')
# plt.xlim(0, max(x_inv))
# plt.ylim(0, max(y_vals) + 0.1)
plt.legend()  
plt.show()


# Exercise 3: Rejection Sampling

In [None]:
# Define the function f(x1, x2, x3)
def f(x1, x2, x3):
    return (1 - np.cos(x1) * np.cos(x2) * np.cos(x3)) / (8 * np.pi**3)

# Sample size
sample_n = 10000

### part a):

In [None]:
# Parameters for rejection sampling
k_a = 1/(2*np.pi**2)

attempts = 0
successes = 0
X_sample = []

# Loop-based rejection sampling
while successes < sample_n:
    attempts += 1
    V = np.random.uniform(0, 2 * np.pi)  # Proposal from G=U(0, 2pi)
    if np.random.uniform(0, 1) < 2*np.pi*f(V,0,0) / k_a:
        X_sample.append(V)
        successes += 1

# Summary
print(f"For a sample size of n={successes}, {attempts} attempts were necessary.")
print(f"Acceptance ratio: {successes / attempts:.4f}")

In [None]:
# Efficient matrix-based rejection sampling
V_sample_matrix = np.random.uniform(0, 2 * np.pi, int(sample_n * 2.1))
U = np.random.uniform(0, 1, int(sample_n * 2.1))
mask = 2*np.pi*f(V_sample_matrix, 0, 0) / k_a >= U
X_sample_matrix = V_sample_matrix[mask][:sample_n]

In [None]:
# Kernel density estimation
plt.figure(figsize=(10, 6))
kde_loop = gaussian_kde(X_sample)
x_vals = np.linspace(0, max(X_sample), 1000)  
y_vals = kde_loop(x_vals)
plt.plot(x_vals, y_vals, lw=2, label="Loop-based")

kde_matrix = gaussian_kde(X_sample_matrix)
x_vals = np.linspace(0, max(X_sample), 1000) 
y_vals = kde_matrix(x_vals)
plt.plot(x_vals, y_vals, color='red', lw=2, label="Matrix-based")

plt.title("Kernel Density Estimation (Part a)")
plt.xlabel("x")
plt.ylabel("Density")
plt.legend()
plt.show()

### Part b):

In [None]:
k_b = 1/np.pi
V_sample_2d = np.random.uniform(0, 2 * np.pi, (int(sample_n * 5), 2))
U = np.random.uniform(0, 1, int(sample_n * 5))
mask = 4 * np.pi**2 * f(V_sample_2d[:, 0], V_sample_2d[:, 1], 0) / k_b >= U
X_sample_2d = V_sample_2d[mask][:sample_n]

In [None]:
# Scatter plot for 2D samples
plt.figure(figsize=(6, 6))
plt.scatter(X_sample_2d[:, 0], X_sample_2d[:, 1], alpha=0.5, s=10, marker='.')
plt.title("Scatterplot of Samples (Part b)")
plt.xlabel("x1")
plt.ylabel("x2")
plt.xlim([0, 2 * np.pi])
plt.ylim([0, 2 * np.pi])
plt.gca().set_aspect('equal', adjustable='box')
plt.show()

# Exercise 4: Gibbs Sampling

### part b):

In [None]:
# Define functions for conditional distributions
mu = 0
s = 1
corr = 0.7

# generate samples from the conditional normal distributions
def f2_given_1(x_1):
    mean = mu + corr * (x_1 - mu)
    std_dev = np.sqrt(s**2 * (1 - corr**2))
    return np.random.normal(mean, std_dev)

def f1_given_2(x_2):
    mean = mu + corr * (x_2 - mu)
    std_dev = np.sqrt(s**2 * (1 - corr**2))
    return np.random.normal(mean, std_dev)

# Generate Markov chain
length = 1100
chain = np.zeros((length, 2)) # use to store the generated samples

# Initialize the chain with outliers to simulate a longer burn-in period
chain[0, :] = [10, 10]

for i in range(1, length):
    chain[i, 0] = f1_given_2(chain[i - 1, 1])  # Update X1 based on the previous X2
    chain[i, 1] = f2_given_1(chain[i, 0])     # Update X2 based on the current X1

### part c): Plotting

i) Sequential plot of the two coordinates

In [None]:
plt.figure(figsize=(10, 6))

# First coordinate
plt.subplot(2, 1, 1)
plt.plot(chain[:, 0], '.-')
plt.title('First Coordinate')
plt.xlabel('Iteration')
plt.ylabel('Realization')

# Second coordinate
plt.subplot(2, 1, 2)
plt.plot(chain[:, 1], '.-')
plt.title('Second Coordinate')
plt.xlabel('Iteration')
plt.ylabel('Realization')

plt.tight_layout()
plt.show()

ii) Remove the burn-in phase by discarding the first 50 samples of the chain, compare the distribution of x1 and x2 to standard normal distribution using histograms, Q-Q plots, or kernel density estimates

In [None]:
# Remove burn-in phase
chain = chain[100:, :]

# histogram and kernel density estimates
fig_density, axes = plt.subplots(1, 2, figsize=(12, 4))
for i in range(2):
    ith_data = chain[:, i]
    
    axes[i].hist(ith_data, bins=20, density=True, alpha=0.7, edgecolor='black', label='Histogram')
    
    # Kernel density estimation
    kde = gaussian_kde(ith_data, bw_method='scott')   # Scott's rule for bandwidth
    # x_vals = np.linspace(min(ith_data), max(ith_data), 1000)  # Support range for the density
    x_vals = np.linspace(-4, 4, 1000)
    y_vals = kde(x_vals)
    axes[i].plot(x_vals, y_vals, color='orange', lw=2, label='Kernel Density Estimation')

    # standard normal density
    x_vals = np.linspace(-4, 4, 1000)
    gaussian_density = norm.pdf(x_vals, loc=0, scale=1)
    axes[i].plot(x_vals, gaussian_density,  linestyle='--', color='red', label='N(0, 1)')
    
    axes[i].set_title(f'Histogram and Kernel Density Estimation for $X_{i + 1}$')
    axes[i].set_xlabel('x')
    axes[i].set_ylabel('Density')
    axes[i].set_xlim(-4, 4)
    axes[i].legend(loc='upper right', fontsize='small')
plt.tight_layout()
plt.show()

In [None]:
# Q-Q plot
fig_density, axes = plt.subplots(1, 2, figsize=(10, 4))
for i in range(2):
    ith_data = chain[:, i]
    sm.qqplot(ith_data, line='45', ax=axes[i])
    axes[i].set_title(f'Q-Q plot for $X_{i + 1}$')
    
plt.tight_layout()
plt.show()

iii) Autocorrelation function

In [None]:
fig_acf, axes_acf = plt.subplots(2, 1, figsize=(8, 6))
for i in range(2):
    plot_acf(chain[:, i], lags=60, ax=axes_acf[i])
    axes_acf[i].set_title(f'Autocorrelation Function of Coordinate {i + 1}')

plt.tight_layout()
plt.show()

### part d) Compute covariance matrix

In [None]:
cov_matrix = np.cov(chain, rowvar=False)
print("Covariance Matrix:")
print(cov_matrix)

# Exercise 5: Metropolis-Hastings algorithm

In [None]:
# Transition density (here Gaussian)
sigma = 1
def draw_transition(x_prev): # g(x|x_i-1)
    return norm.rvs(loc=x_prev, scale=sigma)

# Target "density" (only proportional to a density)
def target_density(x):
    return (x > 2) * expon.pdf(x, scale=1)

# Number of samples to generate (including burn-in phase)
length = 21000
chain = np.zeros(length) # used to store the generated samples

# Initialize with an arbitrary value
chain[0] = 50

acceptance = np.zeros(length)  # Counter for accepted samples

for index in range(1, length):
    proposal = draw_transition(chain[index - 1])
    # Symmetric kernel: consider only the ratio p(proposal) / p(current)
    if np.random.rand() < target_density(proposal) / target_density(chain[index - 1]):
        chain[index] = proposal  # Accept proposal
        acceptance[index] = 1
    else:
        chain[index] = chain[index - 1]  # Retain the previous value

### part i): Identify a burn-in period graphically

In [None]:
plt.figure(figsize=(10,6))
plt.plot(chain, '.-')
plt.title('Generated Realizations')
plt.xlabel('Iteration')
plt.ylabel('Realization')

In [None]:
burn_in = 1000
acceptance_ratio = np.sum(acceptance[burn_in:]) / (length - burn_in)
print(f"Acceptance Ratio: {acceptance_ratio}")

# Remove burn-in phase
chain = chain[burn_in:]

### part ii): plot the autocorrelation of the samples:

In [None]:
plt.figure(figsize = (18,6))
plot_acf(chain, lags=150)
plt.title('Autocorrelation Function')

### iii) Kernel density estimation

In [None]:
plt.figure()

# Kernel density estimate of the chain
density = gaussian_kde(chain)
x_vals = np.linspace(0, 6, 1000)
y_vals = density(x_vals)

plt.plot(x_vals, y_vals, label='Kernel Density')
plt.xlim(0, 6)

# Theoretical density
constant = 1 - expon.cdf(2, scale=1)
theoretical_density = target_density(x_vals) / constant
plt.plot(x_vals, theoretical_density, 'k--', label='True')

plt.title('Kernel Density Estimation')
plt.legend(loc='upper right')
plt.show()