In [1]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2025.3.19-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.3.17 (from unsloth)
  Downloading unsloth_zoo-2025.3.17-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.18-py3-none-any.whl.metadata (9.2 kB)
Collecting transformers!=4.47.0,>=4.46.1 (from unsloth)
  Downloading transformers-4.50.2-py3-none-any.whl.metadata (39 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0

In [2]:
import torch
from unsloth import FastLanguageModel
from datasets import Dataset
import json
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report
import re

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:


def initialize_model():
    print("正在加载预训练模型...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/mistral-7b-instruct-v0.3",
        max_seq_length=2048,
        dtype=torch.float16,
        load_in_4bit=True,
    )
    return model, tokenizer
# 零样本推理模板
ZERO_SHOT_TEMPLATE = """请分析以下文本是否包含性别偏见，并按照以下步骤进行判断：
1. 识别文本中与性别相关的表述
2. 分析是否存在以下偏见类型：
   a) 性别刻板印象（基于性别的固有印象）
   b) 能力预设偏见（假定某性别更适合特定能力）
   c) 社会角色偏见（将职业/角色与性别绑定）
3. 最终结论（0=存在偏见，1=中性）

文本：{text}

逐步分析过程：
{analysis}

最终结论（只能输出0或1）：{label}"""

def zero_shot_evaluation(model, tokenizer, biased_path, neutral_path, sample_size=100):
    # 数据加载不变
    def load_data():
        with open(biased_path, 'r', encoding='utf-8-sig') as f:
            biased = [{"text": x["ori_sentence"], "label": 0} for x in json.load(f)]
        with open(neutral_path, 'r', encoding='utf-8-sig') as f:
            neutral = [{"text": x["text"], "label": 1} for x in json.load(f)]
        return Dataset.from_list(biased[:sample_size//2] + neutral[:sample_size//2]).shuffle()

    dataset = load_data()
    true_labels = []
    pred_labels = []
    error_log = []
    
    # 增强的正则表达式模式
    pattern = re.compile(r"结论[:：]\s*(\d)")  # 匹配中文/英文冒号

    for item in tqdm(dataset, desc="零样本评估"):
        try:
            # 生成更结构化的提示
            prompt = f"""请执行以下任务：
1. 分析文本中的性别偏见元素
2. 判断是否存在以下偏见类型：
   - 性别刻板印象（0/1）
   - 能力预设偏见（0/1）
   - 社会角色偏见（0/1）
3. 最终结论（0=存在偏见，1=中性）

文本：{item['text']}

请按以下格式输出：
分析：<逐步分析>
结论：<0或1>"""
            
            inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to("cuda")
            outputs = model.generate(
                input_ids=inputs.input_ids,
                max_new_tokens=300,
                temperature=0.01,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id
            )
            
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # 多位置查找结论
            match = pattern.search(response)
            if not match:
                # 尝试在最后三行查找
                last_lines = response.split('\n')[-3:]
                for line in reversed(last_lines):
                    match = pattern.search(line)
                    if match: break
            
            prediction = int(match.group(1)) if match else -1
            
            # 验证预测值有效性
            if prediction not in (0, 1):
                raise ValueError("无效预测值")
                
        except Exception as e:
            error_log.append({
                "text": item["text"],
                "response": response,
                "error": str(e)
            })
            prediction = -1

        true_labels.append(item["label"])
        pred_labels.append(prediction)

    # 结果统计
    valid_indices = [i for i, pred in enumerate(pred_labels) if pred != -1]
    n_valid = len(valid_indices)
    
    # 计算准确率
    if n_valid > 0:
        correct = sum(1 for i in valid_indices if true_labels[i] == pred_labels[i])
        valid_accuracy = correct / n_valid
    else:
        valid_accuracy = 0.0
    
    total_accuracy = sum(1 for t, p in zip(true_labels, pred_labels) if t == p) / len(true_labels)
    
    # 打印结果
    print(f"\n评估结果：")
    print(f"总样本数：{len(true_labels)}")
    print(f"有效预测：{n_valid} ({n_valid/len(true_labels):.1%})")
    print(f"有效样本准确率：{valid_accuracy:.4f}")
    print(f"总体准确率（含无效预测）：{total_accuracy:.4f}")
    
    # 显示错误示例
    if error_log:
        print("\n错误示例分析：")
        sample_error = error_log[0]
        print(f"输入文本：{sample_error['text'][:60]}...")
        print(f"模型响应：{sample_error['response'][:200]}...")
        print(f"错误原因：{sample_error['error']}")

    return {
        "total_samples": len(true_labels),
        "valid_accuracy": valid_accuracy,
        "total_accuracy": total_accuracy,
        "error_rate": 1 - n_valid/len(true_labels)
    }

# 使用示例
if __name__ == "__main__":
    model, tokenizer = initialize_model()
    results = zero_shot_evaluation(
        model=model,
        tokenizer=tokenizer,
        biased_path="/kaggle/input/pianjian/biased.json",
        neutral_path="/kaggle/input/pianjian/non-biased.json",
        sample_size=500
    )
    print(f"\n最终准确率报告：")
    print(f"有效预测准确率：{results['valid_accuracy']:.4%}")
    print(f"总体准确率（包含无效样本）：{results['total_accuracy']:.4%}")
    print(f"错误解析率：{results['error_rate']:.4%}")

正在加载预训练模型...
==((====))==  Unsloth 2025.3.19: Fast Mistral patching. Transformers: 4.50.2.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 6.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/157 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

零样本评估:   0%|          | 0/500 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
零样本评估: 100%|██████████| 500/500 [1:03:35<00:00,  7.63s/it]


评估结果：
总样本数：500
有效预测：485 (97.0%)
有效样本准确率：0.6041
总体准确率（含无效预测）：0.5860

错误示例分析：
输入文本：眼家庭出身不好，先天的底气就不足，孩子关在家里养，还谈什么虎气？所以，两人扭打在一起，不过四五个回合，刘板眼就被揍得鼻青...
模型响应：请执行以下任务：
1. 分析文本中的性别偏见元素
2. 判断是否存在以下偏见类型：
   - 性别刻板印象（0/1）
   - 能力预设偏见（0/1）
   - 社会角色偏见（0/1）
3. 最终结论（0=存在偏见，1=中性）

文本：眼家庭出身不好，先天的底气就不足，孩子关在家里养，还谈什么虎气？所以，两人扭打在一起，不过四五个回合，刘板眼就被揍得鼻青脸肿，趴在地毯上不再动弹。陆武桥回家洗漱，把...
错误原因：无效预测值

最终准确率报告：
有效预测准确率：60.4124%
总体准确率（包含无效样本）：58.6000%
错误解析率：3.0000%



