In [1]:
import numpy as np
import os
from scipy.special import expit
from scipy.stats import norm

In [9]:
# 寻找最优旋转
def find_optimal_rotation_matrix(true_theta, pred_theta):
    pred_theta = pred_theta - pred_theta.mean(axis=0) # 进行中心化
    assert true_theta.shape == pred_theta.shape, "Matrices must have the same shape"
    U, _, Vt = np.linalg.svd(pred_theta.T @ true_theta)
    Q = Vt.T @ U.T 
    return Q

# 对数似然函数的一阶导数
def log_likelihood_first_derivative(A_ij, pi_ij):
    return A_ij - expit(pi_ij)

# 对数似然函数的二阶导数
def log_likelihood_second_derivative(M):
    sigma_ij = expit(M)
    return sigma_ij * (sigma_ij - 1)

# 生成M矩阵
def generate_M_matrix(alphas, thetas):
    n, r = thetas.shape
    ones_n = np.ones((n, 1))

    term1 = np.dot(ones_n, alphas.reshape(1, -1))
    term2 = np.dot(alphas.reshape(-1, 1), ones_n.T)

    M = term1 + term2 + thetas @ thetas.T
    return M

# 计算sigma_hat
def compute_sigma_hat(i, M, Z, r=2):
    sigma_hat_i = np.zeros((r + 1, r + 1))
    for j in range(M.shape[0]):
        if i != j:
            h_j = np.append(Z[j], 1).reshape(-1, 1)
            l_double_prime = log_likelihood_second_derivative(M[i, j])
            sigma_hat_i += l_double_prime * h_j @ h_j.T
    sigma_hat_i /= M.shape[0]
    return sigma_hat_i

# 计算指定的\hat{\Sigma}_i
def compute_omega_hat(i, M, Z, A, r=2):
    omega_hat_i = np.zeros((r + 1, r + 1))
    for j in range(M.shape[0]):
        if i != j:
            h_j = np.append(Z[j], 1).reshape(-1, 1)
            l_double_prime = log_likelihood_first_derivative(A[i,j], M[i,j])**2
            omega_hat_i += l_double_prime * h_j @ h_j.T
    omega_hat_i /= M.shape[0]
    return omega_hat_i

#### 计算样本协方差矩阵

In [10]:
number = 500
base_folder = r'/home/user/CYH/Code_For_MDS/Project/para_result/parameter estimation/inner-product/Binomial/Simulation10000_2_1.0relative_1e-08'

true_theta = np.load(base_folder + f'/n_{number}/1/true_theta.npy')
r = 2
H = []
path_ = base_folder + f'/n_{number}/'
for _ in range(1,len(os.listdir(path_ ))+1):
    pred_alpha = np.load(path_+f'/{_}/pred_alpha.npy')
    pred_theta = np.load(path_+f'/{_}/pred_theta.npy')
    Q = find_optimal_rotation_matrix(true_theta, pred_theta)
    pred_theta = pred_theta @ Q.T

    H_temp = np.hstack((pred_theta, pred_alpha.reshape(-1,1)))
    H.append(H_temp[0])
H = np.array(H)
true_matrix = np.cov(H, rowvar=False)
true_matrix

array([[ 9.11285147e-03,  5.42220120e-04,  1.50342883e-04],
       [ 5.42220120e-04,  1.00306871e-02, -5.75818208e-07],
       [ 1.50342883e-04, -5.75818208e-07,  9.84990637e-03]])

#### 计算sandwich的值

In [11]:
haha = {}
for _ in range(1,201):
    A = np.load(base_folder + f'/n_{number}/{_}/adjacency_matrix.npy')
    pred_alpha = np.load(base_folder + f'/n_{number}/{_}/pred_alpha.npy')
    pred_theta = np.load(base_folder + f'/n_{number}/{_}/pred_theta.npy')
    Q = find_optimal_rotation_matrix(true_theta, pred_theta)
    pred_theta = pred_theta @ Q.T

    M = generate_M_matrix(pred_alpha, pred_theta)
    Z = pred_theta

    left =compute_sigma_hat(0, M, Z)
    mid = compute_omega_hat(0, M, Z, A)
    a = np.linalg.inv(left)
    sandwich = a@mid@a

    
    haha[_] = np.sum(np.abs(sandwich/number - true_matrix))

In [12]:
# 找寻表现最好的值
min_key = min(haha, key=haha.get)
max_key = max(haha, key=haha.get)
min_key, max_key

(191, 188)

In [13]:
_ = 191
A = np.load(base_folder + f'/n_{number}/{_}/adjacency_matrix.npy')
pred_alpha = np.load(base_folder + f'/n_{number}/{_}/pred_alpha.npy')
pred_theta = np.load(base_folder + f'/n_{number}/{_}/pred_theta.npy')
Q = find_optimal_rotation_matrix(true_theta, pred_theta)
pred_theta = pred_theta @ Q.T

M = generate_M_matrix(pred_alpha, pred_theta)
Z = pred_theta

left =compute_sigma_hat(0, M, Z)
mid = compute_omega_hat(0, M, Z, A)
a = np.linalg.inv(left)
sandwich = a@mid@a
sandwich/number

array([[ 9.84378817e-03,  2.64818534e-04,  1.19784448e-04],
       [ 2.64818534e-04,  1.02383525e-02, -1.55657977e-05],
       [ 1.19784448e-04, -1.55657977e-05,  8.61146571e-03]])

#### coverage rates

In [None]:
def calculate_sandwich(base_path):
    A = np.load(f'{base_path}/adjacency_matrix.npy')
    pred_alpha = np.load(f'{base_path}/pred_alpha.npy')
    pred_theta = np.load(f'{base_path}/pred_theta.npy')
    true_alpha = np.load(f'{base_path}/true_alpha.npy')
    true_theta = np.load(f'{base_path}/true_theta.npy')
    
    Q = find_optimal_rotation_matrix(true_theta, pred_theta)
    pred_theta = pred_theta @ Q.T

    M = generate_M_matrix(pred_alpha, pred_theta)

    sigma_hat = compute_sigma_hat(0, M, pred_theta)
    omega_hat = compute_omega_hat(0, M, pred_theta, A)
    a = np.linalg.inv(sigma_hat)
    sandwich = a @ omega_hat @ a
    sandwich = sandwich/pred_theta.shape[0]

    return sandwich[0][0], pred_theta[0][0], true_theta[0][0]

def check_confidence_interval(base_path):
    var_estimate, pred_estimate, true_value = calculate_sandwich(base_path)
    standard_error = np.sqrt(var_estimate)
    
    z_score = norm.ppf(0.975)
    lower_bound = pred_estimate - z_score * standard_error
    upper_bound = pred_estimate + z_score * standard_error
    
    is_within_interval = lower_bound <= true_value <= upper_bound
    
    return is_within_interval

# coverage rates 
base_folder = r'/home/user/CYH/Code_For_MDS/Project/para_result/parameter estimation/inner-product/Binomial/Simulation10000_2_1.0relative_1e-08'
n_values = sorted([int(i.split('_')[-1]) for i in os.listdir(base_folder) if i.startswith('n_')])
coverage_dict = {}

for number in n_values:
    path = os.path.join(base_folder, f'n_{number}')
    __ = 0
    for _ in range(len(os.listdir(path))):
        if check_confidence_interval(path+f'/{_+1}'):
            __ += 1
    coverage_dict[number] = __/len(os.listdir(path))
    
coverage_dict