### Step 1: 导入必要的库和设置日志格式

In [None]:
import sys
import os
import logging
import pickle

sys.path.append("/root/Uni-Mof-Main-PaddlePaddle")

import paddle
from paddle_utils import *
from unicore import checkpoint_utils, distributed_utils, options, tasks, utils
from unicore.logging import progress_bar

### Step 2: 设置推理任务所需参数

In [None]:
DATA_PATH = "/root/Uni-Mof-Main-PaddlePaddle/single-system_gas_adsorption_property_prediction"
USER_DIR = "/root/Uni-Mof-Main-PaddlePaddle/unimof"
MODEL_PATH = "/root/Uni-Mof-Main-PaddlePaddle/save_finetune_single_gpu/CoRE_LCD_bset.pdparams"
RESULTS_PATH = "/root/Uni-Mof-Main-PaddlePaddle/results"
TASK_NAME = "CoRE_LCD"
TASK_TYPE = "unimof_v1"
LOSS = "mof_v1_mse"
ARCH = "unimat_base"
SUBSET = "test"
BATCH_SIZE = 16
NUM_WORKERS = 4
DEVICE_ID = 0
USE_FP16 = True
FP16 = True

# 注册 user-dir
sys.path.insert(0, USER_DIR)

### Step 3: 模拟命令行参数并解析

In [None]:
# 构建参数列表
parser = options.get_validation_parser()
options.add_model_args(parser)

args_list = [
    DATA_PATH,
    "--user-dir", USER_DIR,
    "--task", TASK_TYPE,
    "--task-name", TASK_NAME,
    "--arch", ARCH,
    "--valid-subset", SUBSET,
    "--path", MODEL_PATH,
    "--batch-size", str(BATCH_SIZE),
    "--num-workers", str(NUM_WORKERS),
    "--results-path", RESULTS_PATH,
    "--device-id", str(DEVICE_ID),
    "--loss", LOSS,
]
if FP16:
    args_list.append("--fp16")

args = options.parse_args_and_arch(parser, input_args=args_list)

### Step 4: 加载模型参数并设置设备

In [None]:
# 设置 device
use_fp16 = args.fp16
use_cuda = paddle.device.cuda.device_count() >= 1 and not args.cpu
if use_cuda:
    paddle.device.set_device(device=device2str(args.device_id))


print("开始加载模型...")
state = checkpoint_utils.load_checkpoint_to_cpu(args.path)
print("模型权重已加载。")

print("设置任务和模型结构...")
task = tasks.setup_task(args)
model = task.build_model(args)

for key in list(state.keys()):
    if any(k in key for k in ['fc1.weight', 'fc2.weight', 'in_proj.weight', 'linear1.weight', 'linear2.weight', 'out_proj.weight']):
        state[key] = state[key].transpose([1, 0])

model.set_state_dict(state_dict=state)
print("模型结构和权重已就绪。")


if use_cuda:
    print(f"使用GPU: {args.device_id}")
    model.cuda()

model.eval()
print("构建损失函数...")
loss = task.build_loss(args)
loss.eval()
print("损失函数已就绪。")

开始加载模型...
模型权重已加载。
设置任务和模型结构...
模型结构和权重已就绪。
使用半精度(fp16)推理。
使用GPU: 0
构建损失函数...
损失函数已就绪。


### Step 5: 加载数据并执行推理

In [None]:
subset = SUBSET
print(f"加载数据集: {subset}")
task.load_dataset(subset, combine=False, epoch=1)
dataset = task.dataset(subset)

if not os.path.exists(args.results_path):
    os.makedirs(args.results_path)

save_path = os.path.join(args.results_path, "predict_{}_{}.pkl".format(TASK_NAME, subset))

itr = task.get_batch_iterator(
    dataset=dataset,
    batch_size=args.batch_size,
    ignore_invalid_inputs=True,
    required_batch_size_multiple=args.required_batch_size_multiple,
    seed=args.seed,
    num_shards=1,
    shard_id=0,
    num_workers=0,
    data_buffer_size=args.data_buffer_size,
).next_epoch_itr(shuffle=False)

progress = progress_bar.progress_bar(
    itr,
    log_format=args.log_format,
    log_interval=args.log_interval,
    prefix=f"valid on '{subset}' subset",
    default_log_format="tqdm" if not args.no_progress_bar else "simple",
)

log_outputs = []
print(f"开始推理，结果将保存到: {save_path}")
for i, sample in enumerate(progress):
    sample = utils.move_to_cuda(sample) if use_cuda else sample
    if len(sample) == 0:
        continue
    _, _, log_output = task.valid_step(sample, model, loss, test=True)
    progress.log({}, step=i)
    log_outputs.append(log_output)

processed_log_outputs = []
for log in log_outputs:
    processed_log = {}
    for k, v in log.items():
        if hasattr(v, 'numpy'):
            processed_log[k] = v.numpy()
        else:
            processed_log[k] = v
    processed_log_outputs.append(processed_log)

pickle.dump(log_outputs, open(save_path, "wb"))
print(f"推理完成，结果已保存到: {save_path}")

加载数据集: test
开始推理，结果将保存到: /home/results/save_finetune_single_gpu_test.out.pkl
推理完成，结果已保存到: /home/results/save_finetune_single_gpu_test.out.pkl


### Step 6: 推理结果读取与R²评估

In [15]:
import pickle
import numpy as np
from sklearn.metrics import r2_score

with open("/home/results/save_finetune_single_gpu_test.out.pkl", "rb") as f:
    results = pickle.load(f)  # results 通常是包含字典的列表
    
# 提取所有预测值和真实值
all_preds = []
all_targets = []
for batch in results:
    all_preds.extend(batch["predict"].cpu().numpy().flatten())  # 转换为1D数组
    all_targets.extend(batch["target"].cpu().numpy().flatten())

# 转换为NumPy数组
all_preds = np.array(all_preds)
all_targets = np.array(all_targets)

r2 = r2_score(all_targets, all_preds)
print(f"R² Score: {r2:.4f}")

R² Score: 0.7729
