Skip to content

Xuan-Van/ExpertHead

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

6 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

环境

conda create -n expert python=3.10 -y
conda activate expert
pip install torch numpy pandas tqdm transformers vllm rank_bm25 accelerate

或者

pip install -r requirements.txt

模型

名称 下载地址
BM25 https://github.com/dorianbrown/rank_bm25
DPR (query) https://huggingface.co/facebook/dpr-question_encoder-single-nq-base
DPR (context) https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base
Contriever https://huggingface.co/facebook/contriever
MiniLM https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2
GTR https://huggingface.co/sentence-transformers/gtr-t5-base
ColBERTv2 https://huggingface.co/colbert-ir/colbertv2.0
BGE https://huggingface.co/BAAI/bge-base-en-v1.5
Qwen3 https://huggingface.co/Qwen/Qwen3-Embedding-0.6B
Llama-3-8B-Instruct https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
Mistral-7B-Instruct-v0.3 https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
Qwen-2.5-7B-Instruct https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
Qwen-2.5-14B-Instruct https://huggingface.co/Qwen/Qwen2.5-14B-Instruct
Qwen-2.5-32B-Instruct https://huggingface.co/Qwen/Qwen2.5-32B-Instruct

数据集

名称 下载地址
HotpotQA https://hotpotqa.github.io/
2WikiMultiHopQA https://github.com/Alab-NII/2wikimultihop
MuSiQue https://github.com/stonybrooknlp/musique

脚本执行

模型响应

#!/bin/bash

# 数据集和输入路径
DATASET_NAME="HotpotQA"
DATA_PATH="data/dataset/HotpotQA.json"
OUTPUT_DIR="data/response"

# 确保输出目录存在
mkdir -p ${OUTPUT_DIR}

# 模型列表
declare -A MODELS
MODELS["Llama"]="model/Llama-3-8B-Instruct"
MODELS["Mistral"]="model/Mistral-7B-Instruct-v0.3"
MODELS["Qwen"]="model/Qwen2.5-7B-Instruct"

# 遍历模型运行脚本
for name in "${!MODELS[@]}"; do
    MODEL_PATH=${MODELS[$name]}

    python src/response.py \
        --data_path ${DATA_PATH} \
        --dataset_name ${DATASET_NAME} \
        --output_path ${OUTPUT_DIR}/${name}.json \
        --model_path ${MODEL_PATH}
done

注意力分析

#!/bin/bash

# 模型列表
declare -A MODELS
MODELS["Llama"]="model/Llama-3-8B-Instruct"
MODELS["Mistral"]="model/Mistral-7B-Instruct-v0.3"
MODELS["Qwen"]="model/Qwen2.5-7B-Instruct"

DATA_DIR="data/response"

for name in "${!MODELS[@]}"; do
    MODEL_PATH=${MODELS[$name]}
    DATA_PATH="${DATA_DIR}/${name}.json"
    OUTPUT_DIR="result/${name}"
    mkdir -p "${OUTPUT_DIR}"

    python src/attention.py \
        --model_path ${MODEL_PATH} \
        --data_path ${DATA_PATH} \
        --csv_path "${OUTPUT_DIR}/attention_from_question.csv"

    python src/attention.py \
        --model_path ${MODEL_PATH} \
        --data_path ${DATA_PATH} \
        --csv_path "${OUTPUT_DIR}/attention_from_response.csv" \
        --response
done

评估

#!/bin/bash

# 模型列表
declare -A MODELS
MODELS["Llama"]="model/Llama-3-8B-Instruct"
MODELS["Mistral"]="model/Mistral-7B-Instruct-v0.3"
MODELS["Qwen"]="model/Qwen2.5-7B-Instruct"

for dataset in "HotpotQA" "2WikiMultiHopQA" "MuSiQue"; do
    DATA_PATH="data/dataset/${dataset}.json"
    MODEL_DIR="model/"

    for name in "${!MODELS[@]}"; do
        MODEL_PATH=${MODELS[$name]}

        for head_type in "question_head.json" "response_head.json"; do
            OUTPUT_DIR="baseline/$dataset/$name"
            mkdir -p "$OUTPUT_DIR"
            HEAD_PATH="result/$name/$head_type"

            python src/test.py \
                --data_path "$DATA_PATH" \
                --dataset_name "$dataset" \
                --model_dir "$MODEL_DIR" \
                --output_path "$OUTPUT_DIR/$head_type" \
                --method "bge" \
                --attention \
                --model_path "$MODEL_PATH" \
                --head_path "$HEAD_PATH" \
                --llm_rerank
        done
    done

    METHODS=("bm25" "dpr" "contriever" "minilm" "gtr" "colbert" "bge" "qwen")

    for method in "${METHODS[@]}"; do
        python src/test.py \
            --data_path "$DATA_PATH" \
            --dataset_name "$dataset" \
            --model_dir "$MODEL_DIR" \
            --output_path "baseline/$dataset/${method}.json" \
            --method "$method"
    done
done

层级消融实验

#!/bin/bash

OUTPUT_DIR="baseline/layer"

mkdir -p "$OUTPUT_DIR"

for layer in $(seq 0 31); do
    HEAD_PATH="baseline/head.json"
    OUTPUT_PATH="$OUTPUT_DIR/${layer}.json"

    echo "[" > "$HEAD_PATH"
    for head in $(seq 0 31); do
        if [ $head -lt 31 ]; then
            echo "  [$layer, $head]," >> "$HEAD_PATH"
        else
            echo "  [$layer, $head]" >> "$HEAD_PATH"
        fi
    done
    echo "]" >> "$HEAD_PATH"

    python src/test.py \
        --data_path "data/dataset/HotpotQA.json" \
        --dataset_name "HotpotQA" \
        --model_dir "model/" \
        --output_path "$OUTPUT_PATH" \
        --method "bge" \
        --attention \
        --model_path "model/Llama-3-8B-Instruct" \
        --head_path "$HEAD_PATH"
done

rm "$HEAD_PATH"

阈值敏感性分析

#!/bin/bash

OUTPUT_DIR="baseline/threshold"

for head_file in "result/threshold"/*.json; do
    filename=$(basename "$head_file" .json)

    python src/test.py \
        --data_path "data/dataset/HotpotQA.json" \
        --dataset_name "HotpotQA" \
        --model_dir "model/" \
        --output_path "$OUTPUT_DIR/$filename.json" \
        --method "bge" \
        --attention \
        --model_path "model/Llama-3-8B-Instruct" \
        --head_path "$head_file"
done

About

[ICLR 2026 Poster] Expert Heads: Robust Evidence Identification For Large Language Models

Topics

Resources

Stars

Watchers

Forks

Contributors