In [1]:
# ====================================================================
# |      !!! 终极解决方案：“直捣黄龙”之最终极补丁 !!!             |
# ====================================================================
import sys
import os
from unittest.mock import patch
import types
from importlib.machinery import ModuleSpec

# --- 补丁1：注入“全功能”伪模块 ---
def create_full_fake_module(name, attributes_to_add):
    spec = ModuleSpec(name, None)
    module = types.ModuleType(name)
    module.__spec__ = spec
    for attr in attributes_to_add:
        setattr(module, attr, lambda *args, **kwargs: None)
    return module

sys.modules['num2words'] = create_full_fake_module('num2words', ['num2words'])
sys.modules['word2number'] = create_full_fake_module('word2number', ['w2n'])
print(">>> 补丁1生效：已成功注入“全功能”伪模块。")

# --- 补丁2：“直捣黄龙”，直接替换nltk.download函数 ---
def dummy_nltk_download(*args, **kwargs):
    print(">>> 补丁2生效：已成功拦截并跳过 nltk.download() 调用！<<<")
    return True # 返回成功状态

# 使用正确的函数路径进行替换
patcher = patch('nltk.download', dummy_nltk_download)
patcher.start()
print(">>> 补丁2生效：已成功替换 nltk.download 函数。")


# --- 补丁3：手动为NLTK“指路” ---
nltk_data_dir = os.path.expanduser('~/nltk_data')
import nltk
if nltk_data_dir not in nltk.data.path:
    nltk.data.path.append(nltk_data_dir)
    print(f"成功将 '{nltk_data_dir}' 添加到NLTK的搜索路径。")

# --- 补丁4：设置其他环境变量 ---
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
os.environ['TOKENIZERS_PARALLELISM'] = 'false' 
sys.path.append('..')
# ====================================================================

import torch
import pandas as pd
from tqdm.auto import tqdm

# --- 正常导入所有模块 ---
# 此时，所有已知的环境问题都已被我们的终极补丁修复
from src.models.model_loader import load_main_llm
from src.utils.data_loader import load_sst2_dataset
from src.defenses import BasePredictor, NoDefense, AhpDefense, SelfDenoiseDefense
from src.attacks import AttackerWrapper
from src.utils.metrics import calculate_accuracy, calculate_asr

print("\n--- 恭喜！环境设置最终完成，所有模块导入成功！---")

>>> 补丁1生效：已成功注入“全功能”伪模块。
>>> 补丁2生效：已成功替换 nltk.download 函数。


  import pkg_resources



--- 恭喜！环境设置最终完成，所有模块导入成功！---


In [2]:
# --- 1. 实验设置 ---
TASK = 'sst2'
DATASET_NAME = 'SST-2'
NUM_SAMPLES_TO_TEST = 50 # 鲁棒性实验较慢，先用少量样本测试
ATTACK_RECIPE = 'deepwordbug' # 'textbugger' 或 'deepwordbug'

In [3]:
# (单元格 2 - 最终修正)

# --- 2. 加载模型和数据 ---
local_model_path = "/root/autodl-tmp/vicuna-7b-v1.5"

# ===================== 终极修正：关闭4-bit量化 =====================
# 我们将使用全精度的float16模型，这将占用更多显存（约13-14GB）
# 但这是让TextAttack正常工作的唯一方法。
main_model, main_tokenizer = load_main_llm(model_name=local_model_path, use_4bit=False)
# =============================================================

dataset = load_sst2_dataset(split='test').select(range(NUM_SAMPLES_TO_TEST))
dataset_df = dataset.to_pandas()

`torch_dtype` is deprecated! Use `dtype` instead!


正在加载主模型: /root/autodl-tmp/vicuna-7b-v1.5...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


主模型加载成功。
正在加载SST-2数据集 (test split)...
SST-2数据集加载成功。


In [4]:
# --- 3. 初始化防御策略和攻击器 ---

# 创建基础预测器，所有防御和攻击都将基于它
base_predictor = BasePredictor(main_model, main_tokenizer, task=TASK)

# 定义要对比的防御策略
defenses = {
    "No Defense (Baseline)": NoDefense(base_predictor),
    "AHP-NLI Defense": AhpDefense(base_predictor, k_val=3, m_val=10),
    "Self-Denoise Defense": SelfDenoiseDefense(base_predictor, num_samples=10)
}

# ===================== 修正开始 =====================
# 初始化攻击器时，应该传入我们创建好的 base_predictor 对象
attacker = AttackerWrapper(base_predictor)
# ===================== 修正结束 =====================

正在加载NLI模型: roberta-large-mnli...
NLI模型加载成功。


In [5]:
# (单元格 4 - 最终修正版)

# --- 4. 生成对抗样本 ---
# 注意：这一步会非常慢！
adversarial_df = attacker.attack(dataset, attack_recipe_name=ATTACK_RECIPE)
print(f"\nTextAttack成功生成了 {len(adversarial_df)} / {NUM_SAMPLES_TO_TEST} 个对抗样本。")

# --- 4.5. 对齐攻击数据 (关键修复) ---
# 创建一个从“原始文本”到“对抗文本”的映射字典
attack_map = pd.Series(adversarial_df.perturbed_text.values, index=adversarial_df.original_text).to_dict()

# 创建一个与原始数据集(50个样本)完全对齐的、完整的对抗样本列表
# 如果一个样本在attack_map中找不到，说明攻击失败或被跳过，我们就使用原始句子本身。
full_perturbed_texts = [attack_map.get(sent, sent) for sent in dataset_df['sentence']]

print(f"已生成与原始数据对齐的完整对抗样本列表，长度为: {len(full_perturbed_texts)}")

textattack: Unknown if model of class <class 'src.attacks.attacks.ClassificationModelForAttack'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  unk
  )
  (goal_function):  UntargetedClassification
  (transformation):  CompositeTransformation(
    (0): WordSwapNeighboringCharacterSwap(
        (random_one):  True
      )
    (1): WordSwapRandomCharacterSubstitution(
        (random_one):  True
      )
    (2): WordSwapRandomCharacterDeletion(
        (random_one):  True
      )
    (3): WordSwapRandomCharacterInsertion(
        (random_one):  True
      )
    )
  (constraints): 
    (0): LevenshteinEditDistance(
        (max_edit_distance):  30
        (compare_against_original):  True
      )
    (1): RepeatModification
    (2): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 0 / 0 / 10 / 10: 100%|██████████| 10/10 [00:00<00:00, 13.68it/s]

--------------------------------------------- Result 1 ---------------------------------------------

uneasy mishmash of styles and genres .


--------------------------------------------- Result 2 ---------------------------------------------

this film 's relationship to actual tension is the same as what christmas-tree flocking in a spray can is to actual snow : a poor -- if durable -- imitation .


--------------------------------------------- Result 3 ---------------------------------------------

by the end of no such thing the audience , like beatrice , has a watchful affection for the monster .


--------------------------------------------- Result 4 ---------------------------------------------

director rob marshall went out gunning to make a great one .


--------------------------------------------- Result 5 ---------------------------------------------

lathan and diggs have considerable personal charm , and their screen rapport makes the old story seem new .


-----------


  average_perc_words_perturbed = self.perturbed_word_percentages.mean()
  ret = ret.dtype.type(ret / rcount)
  avg_num_queries = self.num_queries.mean()


  0%|          | 0/50 [00:00<?, ?it/s]


TextAttack成功生成了 10 / 50 个对抗样本。
已生成与原始数据对齐的完整对抗样本列表，长度为: 50


In [6]:
# (单元格 5 - 最终修正版)

results = []

for defense_name, defense_method in defenses.items():
    print(f"\n{'='*20} 正在评估防御策略: {defense_name} {'='*20}")

    # a. 评估Clean Accuracy (在原始数据上)
    clean_preds = [defense_method(text) for text in tqdm(dataset_df['sentence'], desc="Clean Eval")]
    clean_accuracy = calculate_accuracy(dataset_df['label_text'], clean_preds)

    # b. 评估Accuracy under Attack (在对齐后的完整对抗样本上)
    # 使用我们新创建的 full_perturbed_texts 列表
    attack_preds = [defense_method(text) for text in tqdm(full_perturbed_texts, desc="Attack Eval")]
    # 真实标签直接使用原始的、对齐的标签即可
    attack_accuracy = calculate_accuracy(dataset_df['label_text'], attack_preds)

    # c. 计算ASR
    if "baseline_clean_preds" not in locals():
        baseline_clean_preds = [defenses["No Defense (Baseline)"](text) for text in tqdm(dataset_df['sentence'], desc="Baseline Eval")]
    
    # 现在所有列表长度都为50，不再有IndexError
    attack_success_rate = calculate_asr(baseline_clean_preds, attack_preds, dataset_df['label_text'].tolist())

    results.append({
        "防御方法 (Defense)": defense_name,
        "原始准确率 (Clean Acc)": clean_accuracy,
        "攻击后准确率 (Attack Acc)": attack_accuracy,
        "攻击成功率 (ASR)": attack_success_rate
    })




Clean Eval:   0%|          | 0/50 [00:00<?, ?it/s]

Attack Eval:   0%|          | 0/50 [00:00<?, ?it/s]

Baseline Eval:   0%|          | 0/50 [00:00<?, ?it/s]




Clean Eval:   0%|          | 0/50 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

Calculating NLI Scores:   0%|          | 0/10 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [11]:
# --- 6. 展示结果 ---
results_df = pd.DataFrame(results)
print("\n实验二：经验鲁棒性评估 - 结果汇总")
print("=" * 70)
print(results_df.to_string(index=False))


实验二：经验鲁棒性评估 - 结果汇总
       防御方法 (Defense)  原始准确率 (Clean Acc)  攻击后准确率 (Attack Acc)  攻击成功率 (ASR)
No Defense (Baseline)               0.54                 0.54     0.000000
      AHP-NLI Defense               0.56                 0.56     0.000000
 Self-Denoise Defense               0.56                 0.52     0.037037


In [8]:
# 保存结果
if not os.path.exists('../results'):
    os.makedirs('../results')
save_path_exp2 = f'../results/experiment_2_robustness_{ATTACK_RECIPE}.csv'
results_df.to_csv(save_path_exp2, index=False)
print(f"\n实验结果已成功保存到: {save_path_exp2}")


实验结果已成功保存到: ../results/experiment_2_robustness_deepwordbug.csv


In [9]:
#下载NLtk

In [10]:
# import subprocess
# import os
# import zipfile

# # =================================================================
# # |          !!! 终极解决方案：启用AutoDL官方网络加速 !!!           |
# # =================================================================

# print("--- 正在启用AutoDL官方学术网络加速... ---")
# # 执行官方教程提供的命令，加载并应用代理设置到当前环境
# result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
# output = result.stdout
# for line in output.splitlines():
#     if '=' in line:
#         var, value = line.split('=', 1)
#         os.environ[var] = value
#         print(f"成功设置环境变量: {var}")

# print("--- 网络加速已启用！开始下载NLTK数据包... ---\n")

# # =================================================================

# # --- NLTK数据将被保存到的路径 ---
# nltk_data_path = os.path.expanduser('~/nltk_data')
# print(f"NLTK数据将被下载到: {nltk_data_path}\n")

# # --- TextAttack所需的核心NLTK包列表 ---
# packages_to_download = [
#     ('averaged_perceptron_tagger', 'taggers'),
#     ('stopwords', 'corpora'),
#     ('omw-1.4', 'corpora'),
#     ('universal_tagset', 'taggers'),
#     ('wordnet', 'corpora'),
#     ('punkt', 'tokenizers')
# ]

# # --- NLTK数据的官方下载源 (不再需要任何代理) ---
# base_url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/"

# # --- 使用wget循环下载并解压 ---
# for package, subdir in packages_to_download:
    
#     zip_url = f"{base_url}{subdir}/{package}.zip"
#     zip_path = os.path.join(nltk_data_path, f"{package}.zip")
#     extract_dir = os.path.join(nltk_data_path, subdir)
#     final_path = os.path.join(extract_dir, package)

#     if os.path.exists(final_path):
#          print(f"'{package}' 已存在，跳过。")
#          continue

#     os.makedirs(extract_dir, exist_ok=True)
    
#     try:
#         # --- 使用wget命令进行下载 (不再需要任何代理或证书参数) ---
#         print(f"--- 正在下载: {package} ---")
#         download_command = f"wget -O {zip_path} {zip_url}"
        
#         exit_code = os.system(download_command)
        
#         if exit_code != 0:
#             raise ConnectionError(f"wget下载失败，退出码: {exit_code}")

#         print(f" -> 正在解压 '{package}'...")
#         with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#             zip_ref.extractall(extract_dir)
        
#         os.remove(zip_path)
#         print(f" -> '{package}' 准备就绪！\n")

#     except Exception as e:
#         print(f"处理 '{package}' 时发生错误: {e}")
#         if os.path.exists(zip_path):
#             os.remove(zip_path)

# print("--- 所有NLTK依赖包已处理完毕 ---")

# # =================================================================
# # |             !!! 实验结束后记得取消加速 !!!                     |
# # =================================================================
# print("\n重要提示：下载任务完成后，为避免影响其他网络连接，")
# print("您可以在终端中运行 'unset http_proxy && unset https_proxy' 来取消加速。")
# # =================================================================

# import os
# import zipfile

# print("--- 开始离线安装NLTK数据包 ---")

# # --- NLTK数据包的目标安装路径 ---
# nltk_data_path = os.path.expanduser('~/nltk_data')
# print(f"NLTK数据将被解压到: {nltk_data_path}\n")

# # --- 定义包名、zip文件名及其对应的目标子目录 ---
# packages_to_unzip = {
#     'averaged_perceptron_tagger': ('averaged_perceptron_tagger.zip', 'taggers'),
#     'stopwords': ('stopwords.zip', 'corpora'),
#     'omw-1.4': ('omw-1.4.zip', 'corpora'),
#     'universal_tagset': ('universal_tagset.zip', 'taggers'),
#     'wordnet': ('wordnet.zip', 'corpora'),
#     'punkt': ('punkt.zip', 'tokenizers')
# }

# # --- 循环检查、解压并清理 ---
# all_successful = True
# for package_name, (zip_filename, subdir) in packages_to_unzip.items():
    
#     # zip文件在我们项目根目录下的路径
#     # '..' 代表 notebooks 文件夹的上一级目录
#     zip_path_in_project = os.path.join('..', zip_filename) 

#     # 最终解压后的文件夹路径
#     extract_dir = os.path.join(nltk_data_path, subdir)
#     final_path = os.path.join(extract_dir, package_name)

#     print(f"处理: {package_name}")

#     # 检查zip文件是否存在
#     if not os.path.exists(zip_path_in_project):
#         print(f" -> 错误: 未在项目根目录找到 '{zip_filename}'。请确保文件已上传。")
#         all_successful = False
#         continue

#     # 如果目标文件夹已存在，则跳过
#     if os.path.exists(final_path):
#          print(f" -> '{package_name}' 已经存在，跳过。")
#          continue
    
#     # 确保目标文件夹存在
#     os.makedirs(extract_dir, exist_ok=True)
    
#     try:
#         # 解压缩
#         print(f" -> 正在将 '{zip_filename}' 解压到 '{extract_dir}'...")
#         with zipfile.ZipFile(zip_path_in_project, 'r') as zip_ref:
#             zip_ref.extractall(extract_dir)
#         print(f" -> '{package_name}' 解压成功！")

#     except Exception as e:
#         print(f" -> 解压时发生错误: {e}")
#         all_successful = False

# if all_successful:
#     print("\n--- 所有NLTK依赖包已成功离线安装！---")
# else:
#     print("\n--- 部分NLTK依赖包安装失败，请检查错误信息。 ---")