In [1]:
# =======================================================
# |                !!! 环境设置 !!!                      |
# =======================================================
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
import sys
sys.path.append('..')
# =======================================================

import torch
import time
import pandas as pd
from tqdm.auto import tqdm

# 导入我们自己编写的核心模块
from src.models.model_loader import load_main_llm
from src.utils.data_loader import load_agnews_dataset # <--- 修改点
from src.pipeline import AhpPipeline
from src.utils.metrics import calculate_accuracy

# 确保PyTorch能使用GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"当前使用的设备: {device}")

当前使用的设备: cuda


In [2]:
# 根据方案设置固定参数
M_CANDIDATES = 10
K_PRUNED = 3
DATASET_NAME = "AG News" # <--- 修改点
NUM_SAMPLES_TO_TEST = 100

# 定义我们要对比的剪枝器列表
PRUNER_TYPES = ['semantic', 'perplexity', 'nli', 'clustering']

In [3]:
# 定义模型加载路径
local_model_path = "/root/autodl-tmp/vicuna-7b-v1.5"
print(f"准备从本地路径加载主模型: {local_model_path}")

# 加载主LLM (Vicuna-7B)
main_model, main_tokenizer = load_main_llm(model_name=local_model_path, use_4bit=True)

# 加载AG News测试集 <--- 修改点
agnews_test_dataset = load_agnews_dataset(split='test')

# 为了方便，我们将数据转换为Pandas DataFrame，并只取前N个样本
agnews_test_df = agnews_test_dataset.to_pandas().head(NUM_SAMPLES_TO_TEST)
print(f"\n成功加载AG News测试集的前 {NUM_SAMPLES_TO_TEST} 条数据。")
agnews_test_df.head()

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


准备从本地路径加载主模型: /root/autodl-tmp/vicuna-7b-v1.5
正在加载主模型: /root/autodl-tmp/vicuna-7b-v1.5...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


主模型加载成功。
正在加载AG News数据集 (test split)...


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

AG News数据集加载成功。

成功加载AG News测试集的前 100 条数据。


Unnamed: 0,text,label,label_text
0,Fears for T N pension after talks Unions repre...,2,Business
1,The Race is On: Second Private Team Sets Launc...,3,Sci/Tech
2,Ky. Company Wins Grant to Study Peptides (AP) ...,3,Sci/Tech
3,Prediction Unit Helps Forecast Wildfires (AP) ...,3,Sci/Tech
4,Calif. Aims to Limit Farm-Related Smog (AP) AP...,3,Sci/Tech


In [4]:
results = []

# 遍历每一种剪枝方法
for pruner_name in PRUNER_TYPES:
    print(f"\n{'='*20} 正在测试剪枝器: {pruner_name.upper()} {'='*20}")
    
    # 初始化AHP流水线，并指定task='ag_news' <--- 关键修改点
    pipeline = AhpPipeline(
        main_model=main_model,
        main_tokenizer=main_tokenizer,
        pruner_name=pruner_name,
        k_val=K_PRUNED,
        m_val=M_CANDIDATES,
        task='ag_news'
    )
    
    predictions = []
    start_time = time.time()
    
    # 遍历AG News测试数据集
    for index, row in tqdm(agnews_test_df.iterrows(), total=len(agnews_test_df), desc=f"Processing with {pruner_name}"):
        sentence = row['text'] # <--- 修改点: AG News的数据列名为'text'
        
        prediction = pipeline.predict_single(sentence)
        predictions.append(prediction)
        
    end_time = time.time()
    
    # 计算指标
    total_time = end_time - start_time
    avg_time_per_sample = total_time / NUM_SAMPLES_TO_TEST
    
    # 使用'label_text'列进行准确率计算
    accuracy = calculate_accuracy(agnews_test_df['label_text'].tolist(), predictions)
    
    # 保存结果
    results.append({
        "筛选规则 (Pruning Rule)": pruner_name,
        "任务准确率 (Task Accuracy)": accuracy,
        "计算开销 (s/sample)": avg_time_per_sample
    })
    
    print(f"剪枝器 '{pruner_name}' 完成测试。")
    print(f" -> 准确率: {accuracy:.4f}")
    print(f" -> 平均耗时: {avg_time_per_sample:.4f} 秒/样本")


正在加载剪枝模型: sentence-transformers/all-mpnet-base-v2...
剪枝模型加载成功。


Processing with semantic:   0%|          | 0/100 [00:00<?, ?it/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


剪枝器 'semantic' 完成测试。
 -> 准确率: 0.5400
 -> 平均耗时: 4.1616 秒/样本

正在加载主模型: gpt2...
主模型加载成功。


Processing with perplexity:   0%|          | 0/100 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

剪枝器 'perplexity' 完成测试。
 -> 准确率: 0.5400
 -> 平均耗时: 4.2483 秒/样本

正在加载NLI模型: roberta-large-mnli...
NLI模型加载成功。


Processing with nli:   0%|          | 0/100 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

剪枝器 'nli' 完成测试。
 -> 准确率: 0.5400
 -> 平均耗时: 4.2584 秒/样本

正在加载剪枝模型: sentence-transformers/all-mpnet-base-v2...
剪枝模型加载成功。


Processing with clustering:   0%|          | 0/100 [00:00<?, ?it/s]

剪枝器 'clustering' 完成测试。
 -> 准确率: 0.5400
 -> 平均耗时: 4.1565 秒/样本


In [5]:
# 将结果转换为DataFrame
results_df_agnews = pd.DataFrame(results)

print("\n实验一 (AG News)：最优筛选规则确定 - 结果汇总")
print("=" * 60)
print(f"数据集: {DATASET_NAME}, 测试样本数: {NUM_SAMPLES_TO_TEST}, M={M_CANDIDATES}, K={K_PRUNED}")
print("-" * 60)

# 按照准确率降序排序
results_df_sorted = results_df_agnews.sort_values(by="任务准确率 (Task Accuracy)", ascending=False)
print(results_df_sorted.to_string(index=False))


实验一 (AG News)：最优筛选规则确定 - 结果汇总
数据集: AG News, 测试样本数: 100, M=10, K=3
------------------------------------------------------------
筛选规则 (Pruning Rule)  任务准确率 (Task Accuracy)  计算开销 (s/sample)
           semantic                   0.54         4.161566
         perplexity                   0.54         4.248329
                nli                   0.54         4.258351
         clustering                   0.54         4.156475


In [6]:
# (新增单元格 6) - 保存AG News实验结果

# 确保我们的results目录存在
if not os.path.exists('../results'):
    os.makedirs('../results')
    print("创建 'results' 文件夹。")

# 定义保存路径
save_path_agnews = '../results/experiment_1_agnews_results.csv'

# 使用 to_csv 方法保存DataFrame
# index=False 表示我们不把DataFrame的行索引写入到文件中
results_df_sorted.to_csv(save_path_agnews, index=False)

print(f"\n实验结果已成功保存到: {save_path_agnews}")

创建 'results' 文件夹。

实验结果已成功保存到: ../results/experiment_1_agnews_results.csv
