In [6]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import sys
# 将 JumpGP_code_py 所在的目录添加到 Python 路径
# sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# sys.path.append(os.path.dirname(os.path.abspath(__file__)))

import torch
import numpy as np
import math

from utils1 import jumpgp_ld_wrapper

from VI_utils_gpu_acc_U import *
from JumpGP_test import *

Using device: cuda


In [14]:
import torch

# 1. 设备选择：优先使用 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. 加载并搬到 device
folder_name = "2025_04_02_00_45"
dataset = load_dataset(folder_name)
X_train = dataset["X_train"].to(device)   # (N_train, D)
Y_train = dataset["Y_train"].to(device)
X_test  = dataset["X_test"].to(device)    # (N_test, D)
Y_test  = dataset["Y_test"].to(device)

# 3. 自动推导维度
N_test, D = X_test.shape
N_train   = X_train.shape[0]
Q   = 2     # 潜在维度
m1  = 10     # 每个 region 的 inducing points 数量
m2  = 20     # 全局 inducing points 数量
T   = N_test
n   = 100   # 每个 region 的邻居数量

# 4. 构造 neighborhoods，并把它们搬到 GPU
neighborhoods = find_neighborhoods(X_test.cpu(), X_train.cpu(), Y_train.cpu(), M=n)
# （find_neighborhoods 内部可能要求 CPU 张量，所以我们传入 cpu()，然后再搬回 GPU）
regions = []
for i in range(T):
    X_nb = neighborhoods[i]['X_neighbors'].to(device)  # (n, D)
    y_nb = neighborhoods[i]['y_neighbors'].to(device)  # (n,)
    regions.append({
        'X': X_nb,
        'y': y_nb,
        # 'U': 1.0,                        # 常数
        'C': torch.randn(m1, Q, device=device)  # 随机初始化
    })

# 5. 初始化 V_params（放到 GPU）
V_params = {
    'mu_V':    torch.randn(m2, Q, D, device=device, requires_grad=True),
    'sigma_V': torch.rand( m2, Q, D, device=device, requires_grad=True),
}

# 6. 初始化 u_params（放到 GPU）
u_params = []
for _ in range(T):
    u_params.append({
        'U_logit': torch.zeros(1, device=device, requires_grad=True),  # 初始 logit=0 -> U=0.5
        'mu_u':         torch.randn(m1, device=device, requires_grad=True),
        'Sigma_u':      torch.eye(m1, device=device, requires_grad=True),
        'sigma_noise':  torch.tensor(0.5, device=device, requires_grad=True),
        'omega':        torch.randn(Q+1, device=device, requires_grad=True),
    })

# 7. 初始化 hyperparams（放到 GPU）
#    用 X_train 的分布来初始化 Z
X_train_mean = X_train.mean(dim=0)
X_train_std  = X_train.std(dim=0)
Z = X_train_mean + torch.randn(m2, D, device=device) * X_train_std

hyperparams = {
    'Z':             Z,                     # (m2, D)
    'X_test':        X_test,                # (T, D)
    'lengthscales':  torch.rand(Q, device=device, requires_grad=True),
    'var_w':         torch.tensor(1.0, device=device, requires_grad=True),
}

print("Everything set!")

# 8. 计算 ELBO、反向传播、训练、预测（所有计算都在 GPU 上）
L = compute_ELBO(regions, V_params, u_params, hyperparams)
print("ELBO L =", L.item())
L.backward()
print("Gradients OK")

V_params, u_params, hyperparams = train_vi(
    regions=regions,
    V_params=V_params,
    u_params=u_params,
    hyperparams=hyperparams,
    lr=1e-3,
    num_steps=200,
    log_interval=50
)
print("train OK")

mu_pred, var_pred = predict_vi_analytic(regions, V_params, u_params, hyperparams)
print("Prediction OK")
print("mu_pred:", mu_pred.shape)
print("var_pred:", var_pred.shape)

rmse, q25, q50, q75 = compute_metrics(mu_pred, var_pred, Y_test)
rmse, q25, q50, q75


Using device: cuda
Everything set!
ELBO L = -130342.83052780842
Gradients OK
Step 1/200, ELBO=-130342.8305
Step 50/200, ELBO=-122763.7900
Step 100/200, ELBO=-115547.6143
Step 150/200, ELBO=-108801.5474
Step 200/200, ELBO=-102488.3187
train OK
Prediction OK
mu_pred: torch.Size([500, 500])
var_pred: torch.Size([500, 500])


(tensor(4.8036, device='cuda:0', dtype=torch.float64, grad_fn=<SqrtBackward0>),
 tensor(249.8943, device='cuda:0', dtype=torch.float64,
        grad_fn=<SqueezeBackward4>),
 tensor(304.9419, device='cuda:0', dtype=torch.float64,
        grad_fn=<SqueezeBackward4>),
 tensor(382.8676, device='cuda:0', dtype=torch.float64,
        grad_fn=<SqueezeBackward4>))

In [10]:
Y_test.max(), Y_test.min(), mu_pred.max(), mu_pred.min(), var_pred.max(), var_pred.min()

(tensor(5.2880, device='cuda:0'),
 tensor(-5.2400, device='cuda:0'),
 tensor(0.5069, device='cuda:0', dtype=torch.float64, grad_fn=<MaxBackward1>),
 tensor(-0.0012, device='cuda:0', dtype=torch.float64, grad_fn=<MinBackward1>),
 tensor(0.2829, device='cuda:0', dtype=torch.float64, grad_fn=<MaxBackward1>),
 tensor(0.0394, device='cuda:0', dtype=torch.float64, grad_fn=<MinBackward1>))

In [3]:
mu_p, var_p = predict_vi(regions, V_params, hyperparams, M=2)

11it [00:14,  1.28it/s]

maximize_PD func, we fail at iteration 0


21it [00:20,  1.65it/s]

maximize_PD func, we fail at iteration 0


31it [00:31,  1.06s/it]

maximize_PD func, we fail at iteration 4


62it [00:58,  1.50it/s]

maximize_PD func, we fail at iteration 0


71it [01:06,  1.17it/s]

maximize_PD func, we fail at iteration 0


85it [01:16,  1.41it/s]

maximize_PD func, we fail at iteration 3


90it [01:22,  1.13s/it]

maximize_PD func, we fail at iteration 2


106it [01:40,  1.05s/it]

maximize_PD func, we fail at iteration 3


132it [02:02,  1.32it/s]

maximize_PD func, we fail at iteration 2


142it [02:09,  1.54it/s]

maximize_PD func, we fail at iteration 0


158it [02:25,  1.08it/s]

maximize_PD func, we fail at iteration 0


171it [02:37,  1.16it/s]

maximize_PD func, we fail at iteration 0


243it [03:42,  1.48it/s]

maximize_PD func, we fail at iteration 0


247it [03:45,  1.43it/s]

maximize_PD func, we fail at iteration 0


270it [04:08,  1.45it/s]

maximize_PD func, we fail at iteration 0


289it [04:24,  1.03s/it]

maximize_PD func, we fail at iteration 5


304it [04:37,  1.58it/s]

maximize_PD func, we fail at iteration 0


360it [05:29,  1.05it/s]

maximize_PD func, we fail at iteration 1


365it [05:33,  1.34it/s]

maximize_PD func, we fail at iteration 0


369it [05:37,  1.13it/s]

maximize_PD func, we fail at iteration 1


374it [05:39,  2.12it/s]

maximize_PD func, we fail at iteration 0


426it [06:28,  1.30it/s]

maximize_PD func, we fail at iteration 0


461it [07:08,  1.12it/s]

maximize_PD func, we fail at iteration 2


487it [07:38,  1.13s/it]

maximize_PD func, we fail at iteration 2


500it [07:51,  1.06it/s]
39it [00:38,  1.74it/s]

maximize_PD func, we fail at iteration 0


47it [00:44,  1.61it/s]

maximize_PD func, we fail at iteration 0


56it [00:53,  1.37it/s]

maximize_PD func, we fail at iteration 0


110it [01:35,  2.66it/s]

maximize_PD func, we fail at iteration 0


127it [01:48,  1.56it/s]

maximize_PD func, we fail at iteration 0


131it [01:50,  1.72it/s]

maximize_PD func, we fail at iteration 0


142it [01:58,  1.38it/s]

maximize_PD func, we fail at iteration 3


198it [02:39,  1.53it/s]

maximize_PD func, we fail at iteration 0


208it [02:46,  1.39it/s]

maximize_PD func, we fail at iteration 2


216it [02:52,  1.69it/s]

maximize_PD func, we fail at iteration 0


268it [03:29,  1.89it/s]

maximize_PD func, we fail at iteration 0


306it [03:55,  1.36it/s]

maximize_PD func, we fail at iteration 6


320it [04:04,  1.85it/s]

maximize_PD func, we fail at iteration 0


346it [04:21,  1.96it/s]

maximize_PD func, we fail at iteration 0


349it [04:23,  1.63it/s]

maximize_PD func, we fail at iteration 0


399it [05:02,  1.11it/s]

maximize_PD func, we fail at iteration 0


418it [05:18,  1.55it/s]

maximize_PD func, we fail at iteration 0


419it [05:19,  1.83it/s]

maximize_PD func, we fail at iteration 1


424it [05:22,  2.09it/s]

maximize_PD func, we fail at iteration 0


495it [06:22,  1.76it/s]

maximize_PD func, we fail at iteration 0


500it [06:27,  1.29it/s]


In [20]:
mu_pred.max(), mu_pred.min()

(tensor(0.1392, device='cuda:0', grad_fn=<MaxBackward1>),
 tensor(-0.2203, device='cuda:0', grad_fn=<MinBackward1>))

In [5]:
mu_p.max(), mu_p.min()
rmse, q25, q50, q75 = compute_metrics(mu_p, var_p, Y_test)
rmse, q25, q50, q75

(tensor(3.8759, device='cuda:0'),
 tensor(2.8957, device='cuda:0'),
 tensor(3.5684, device='cuda:0'),
 tensor(3.8811, device='cuda:0'))