In [1]:
import sys
sys.path.append('..')

import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

import torch
import time
import pandas as pd
from tqdm.auto import tqdm

# 导入我们自己编写的核心模块
from src.models.model_loader import load_main_llm
from src.utils.data_loader import load_sst2_dataset
from src.pipeline import AhpPipeline
from src.utils.metrics import calculate_accuracy

# 确保PyTorch能使用GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"当前使用的设备: {device}")

当前使用的设备: cuda


In [2]:
# 根据方案设置固定参数
M_CANDIDATES = 10  # 候选生成数量
K_PRUNED = 3       # 快速剪枝后保留数量
DATASET_NAME = "SST-2"
NUM_SAMPLES_TO_TEST = 100 # 为了快速得到结果，我们先在100个样本上测试

# 定义我们要对比的剪枝器列表
PRUNER_TYPES = ['semantic', 'perplexity', 'nli', 'clustering']

In [3]:
# 由于模型被下载到了/root/autodl-tmp/，我们需要使用这个“绝对路径”来加载它
local_model_path = "/root/autodl-tmp/vicuna-7b-v1.5" 

print(f"准备从本地路径加载主模型: {local_model_path}")

# 加载主LLM
# use_4bit=True 选项会使用4-bit量化，极大节省显存
main_model, main_tokenizer = load_main_llm(model_name=local_model_path, use_4bit=True)

# 加载SST-2测试集
# 第一次运行时会自动从Hugging Face Hub下载
sst2_test_dataset = load_sst2_dataset(split='test')

# 为了方便，我们将数据转换为Pandas DataFrame，并只取前N个样本
sst2_test_df = sst2_test_dataset.to_pandas().head(NUM_SAMPLES_TO_TEST)
print(f"\n成功加载SST-2测试集的前 {NUM_SAMPLES_TO_TEST} 条数据。")
sst2_test_df.head()

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


准备从本地路径加载主模型: /root/autodl-tmp/vicuna-7b-v1.5
正在加载主模型: /root/autodl-tmp/vicuna-7b-v1.5...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


主模型加载成功。
正在加载SST-2数据集 (test split)...
SST-2数据集加载成功。

成功加载SST-2测试集的前 100 条数据。


Unnamed: 0,sentence,label,idx,label_text
0,uneasy mishmash of styles and genres .,-1,0,positive
1,this film 's relationship to actual tension is...,-1,1,positive
2,"by the end of no such thing the audience , lik...",-1,2,positive
3,director rob marshall went out gunning to make...,-1,3,positive
4,lathan and diggs have considerable personal ch...,-1,4,positive


In [4]:
# # (单元格 4 - 新版调试专用)

# pruner_name_to_debug = 'nli'
# pipeline = AhpPipeline(
#     main_model=main_model,
#     main_tokenizer=main_tokenizer,
#     pruner_name=pruner_name_to_debug,
#     k_val=K_PRUNED,
#     m_val=M_CANDIDATES
# )

# sample_sentence = sst2_test_df.iloc[0]['sentence']
# sample_label = sst2_test_df.iloc[0]['label_text']

# print(f"正在调试样本: '{sample_sentence}'")
# print(f"真实标签: '{sample_label}'")

# # (修改点) 不再传入 sample_label
# prediction = pipeline.predict_single(sample_sentence)

# print(f"\n最终聚合预测结果: '{prediction}'")

In [5]:
# (单元格 4 - 恢复为正式实验代码)

results = []

# 遍历每一种剪枝方法
for pruner_name in PRUNER_TYPES:
    print(f"\n{'='*20} 正在测试剪枝器: {pruner_name.upper()} {'='*20}")

    pipeline = AhpPipeline(
        main_model=main_model,
        main_tokenizer=main_tokenizer,
        pruner_name=pruner_name,
        k_val=K_PRUNED,
        m_val=M_CANDIDATES
    )

    predictions = []
    start_time = time.time()

    # 遍历测试数据集
    for index, row in tqdm(sst2_test_df.iterrows(), total=len(sst2_test_df), desc=f"Processing with {pruner_name}"):
        sentence = row['sentence']

        # (注意) 不再需要传入真实标签
        prediction = pipeline.predict_single(sentence)
        predictions.append(prediction)

    end_time = time.time()

    # 计算指标
    total_time = end_time - start_time
    avg_time_per_sample = total_time / NUM_SAMPLES_TO_TEST

    accuracy = calculate_accuracy(sst2_test_df['label_text'].tolist(), predictions)

    # 保存结果
    results.append({
        "筛选规则 (Pruning Rule)": pruner_name,
        "任务准确率 (Task Accuracy)": accuracy,
        "计算开销 (s/sample)": avg_time_per_sample
    })

    print(f"剪枝器 '{pruner_name}' 完成测试。")
    print(f" -> 准确率: {accuracy:.4f}")
    print(f" -> 平均耗时: {avg_time_per_sample:.4f} 秒/样本")


正在加载剪枝模型: sentence-transformers/all-mpnet-base-v2...
剪枝模型加载成功。


Processing with semantic:   0%|          | 0/100 [00:00<?, ?it/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


剪枝器 'semantic' 完成测试。
 -> 准确率: 0.5300
 -> 平均耗时: 2.6996 秒/样本

正在加载主模型: gpt2...
主模型加载成功。


Processing with perplexity:   0%|          | 0/100 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的困惑度...


Calculating PPL:   0%|          | 0/10 [00:00<?, ?it/s]

剪枝器 'perplexity' 完成测试。
 -> 准确率: 0.5700
 -> 平均耗时: 2.7481 秒/样本

正在加载NLI模型: roberta-large-mnli...
NLI模型加载成功。


Processing with nli:   0%|          | 0/100 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

正在计算候选句子的NLI蕴含分数...


Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

剪枝器 'nli' 完成测试。
 -> 准确率: 0.5500
 -> 平均耗时: 2.7460 秒/样本

正在加载剪枝模型: sentence-transformers/all-mpnet-base-v2...
剪枝模型加载成功。


Processing with clustering:   0%|          | 0/100 [00:00<?, ?it/s]

剪枝器 'clustering' 完成测试。
 -> 准确率: 0.5600
 -> 平均耗时: 2.6934 秒/样本


In [6]:
# 将结果转换为DataFrame
results_df = pd.DataFrame(results)

print("\n实验一：最优筛选规则确定 - 结果汇总")
print("=" * 60)
print(f"数据集: {DATASET_NAME}, 测试样本数: {NUM_SAMPLES_TO_TEST}, M={M_CANDIDATES}, K={K_PRUNED}")
print("-" * 60)

# 按照准确率降序排序
results_df_sorted = results_df.sort_values(by="任务准确率 (Task Accuracy)", ascending=False)
print(results_df_sorted.to_string(index=False))


实验一：最优筛选规则确定 - 结果汇总
数据集: SST-2, 测试样本数: 100, M=10, K=3
------------------------------------------------------------
筛选规则 (Pruning Rule)  任务准确率 (Task Accuracy)  计算开销 (s/sample)
         perplexity                   0.57         2.748090
         clustering                   0.56         2.693377
                nli                   0.55         2.745959
           semantic                   0.53         2.699591


In [7]:
# (新增单元格 6) - 保存SST-2实验结果

# 确保我们的results目录存在
if not os.path.exists('../results'):
    os.makedirs('../results')
    print("创建 'results' 文件夹。")

# 定义保存路径
save_path_sst2 = '../results/experiment_1_sst2_results.csv'

# 使用 to_csv 方法保存DataFrame
# index=False 表示我们不把DataFrame的行索引写入到文件中
results_df_sorted.to_csv(save_path_sst2, index=False)

print(f"\n实验结果已成功保存到: {save_path_sst2}")


实验结果已成功保存到: ../results/experiment_1_sst2_results.csv
