In [1]:
try:
    import google.colab # type: ignore
    from google.colab import output
    COLAB = True
    %pip install sae-lens transformer-lens
except:
    COLAB = False
    from IPython import get_ipython # type: ignore
    ipython = get_ipython(); assert ipython is not None
    ipython.run_line_magic("load_ext", "autoreload")
    ipython.run_line_magic("autoreload", "2")

# Standard imports
import os
import torch
import tqdm
import plotly.express as px  
import random
from datasets import Dataset, DatasetDict, IterableDataset, load_dataset,load_from_disk
from transformer_lens import HookedTransformer
from typing import Any, Generator, Iterator, Literal, cast
from sae_lens import SAE
from transformers import (
    AutoTokenizer,
    LlavaNextForConditionalGeneration,
    LlavaNextProcessor,
    AutoModelForCausalLM,
)
import numpy as np
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"
import concurrent.futures
from transformer_lens.HookedLlava import HookedLlava
from sae_lens.activation_visualization import (
    load_llava_model,
    load_sae,
    separate_feature,
    run_model,
    cal_top_cosimilarity,
)

model_name = "llava-hf/llava-v1.6-mistral-7b-hf"
model_path="/data/models/llava-v1.6-mistral-7b-hf"
sae_path="/data/changye/model/llavasae_obliec100k_SAEV"


In [2]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "6,7" 
sae_device="cuda:7"
device="cuda:0"
processor,  hook_language_model = load_llava_model(
        model_name, model_path, device,n_devices=8,stop_at_layer=17
    )
sae = load_sae(sae_path, sae_device)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Quantization check time: 0.00s
Configuration loading time: 0.00s


config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

Model configuration processing time: 1.16s
State dict loading time: 0.02s


tokenizer_config.json:   0%|          | 0.00/2.07k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

Tokenizer setup time: 5.00s
Embedding setup time: 65.93s
Move device time: 0.00s
Set up time: 0.00s
Model creation time: 70.94s
State dict processing time: 31.23s
Device moving time: 13.26s
Total loading time: 116.61s


This SAE has non-empty model_from_pretrained_kwargs. 
For optimal performance, load the model like so:
model = HookedSAETransformer.from_pretrained_no_processing(..., **cfg.model_from_pretrained_kwargs)


In [3]:
#1.通过采样集给sae feature 评分
## 1.1 读取采样集
dataset_path="/data/changye/data/RLAIF-V-Dataset1k"
system_prompt= " "
user_prompt= 'USER: \n<image> {input}'
assistant_prompt= '\nASSISTANT: {output}'
split_token= 'ASSISTANT:' 
eval_dataset = load_from_disk(
            dataset_path,
        )

total_size = len(eval_dataset)

print(eval_dataset)



Dataset({
    features: ['ds_name', 'image', 'question', 'chosen', 'rejected', 'origin_dataset', 'origin_split', 'idx', 'image_path'],
    num_rows: 1000
})


In [4]:
## 1.2 组合input

def prepare_inputs(i,formatted_sample,processor):
    prompt=formatted_sample['question'][i]
    formatted_prompt = f'{system_prompt}{user_prompt.format(input=prompt)}{assistant_prompt.format(output="")}'
    image = formatted_sample['image'][i]
    image = image.resize((336, 336)).convert('RGBA')
    """处理样本并准备输入"""
    return processor(
        text=formatted_prompt,
        images=image,
        return_tensors='pt',
    )

inputs=[]
with tqdm.tqdm(total=total_size, desc="Processing batches") as pbar:
        max_threads = 60  # 设置你希望的线程数，可以根据需要调整
        # print("os cpu counts",os.cpu_count())
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
            futures = []
            # 提交所有任务到线程池
            for i in range(0, total_size):
                future = executor.submit(prepare_inputs, 
                                        i, 
                                        eval_dataset, 
                                        processor, 
                                       )
                futures.append(future)

            # 使用 as_completed 确保任务完成后及时更新进度条
            for future in concurrent.futures.as_completed(futures):
                input = future.result()  # 获取每个批次处理后的结果
                inputs.append(input)  # 将处理的结果添加到 all_inputs
                pbar.update(1)  # 更新进度条
                pbar.refresh()  # 强制刷新进度条，确保显示更新
    
# print(inputs[0])

Processing batches:   0%|          | 0/1000 [00:00<?, ?it/s]

Processing batches: 100%|██████████| 1000/1000 [32:29<00:00,  1.95s/it] 


In [5]:
# for input in inputs:
# tmp_cache,image_indices, feature_act = run_model(inputs[0], hook_language_model, sae, sae_device,stop_at_layer=17)
# print(tmp_cache.shape)
# print(image_indices.shape)
# print(feature_act.shape)

In [6]:
text_token_meta_list=[]
image_token_meta_list=[]
with tqdm.tqdm(total=len(inputs), desc="Processing cosimilarity") as pbar:
    for input in inputs:
        tmp_cache,image_indices, feature_act = run_model(input, hook_language_model, sae, sae_device,stop_at_layer=17)
        # print(image_indices.shape)
        text_token_list, image_token_list=cal_top_cosimilarity(tmp_cache[0],image_indices[0], feature_act[0])
        text_token_meta_list.append(text_token_list)
        image_token_meta_list.append(image_token_list)
        pbar.update(1)



Processing cosimilarity: 100%|██████████| 1000/1000 [46:20<00:00,  2.78s/it] 


In [7]:
# def save_sliced_pt(data_list, output_folder, file_prefix, slice_size):
#     """
#     将大列表切分成多个小文件保存为 .pt 格式。
    
#     参数:
#     - data_list: 待保存的列表。
#     - output_folder: 输出文件夹路径。
#     - file_prefix: 保存文件的前缀名。
#     - slice_size: 每个小文件的最大元素数。
#     """
#     os.makedirs(output_folder, exist_ok=True)  # 确保文件夹存在
    
#     num_slices = (len(data_list) + slice_size - 1) // slice_size  # 计算分片数量
#     for i in range(num_slices):
#         start_idx = i * slice_size
#         end_idx = min((i + 1) * slice_size, len(data_list))
#         slice_data = data_list[start_idx:end_idx]
        
#         # 构造文件名
#         file_path = os.path.join(output_folder, f"{file_prefix}_part_{i + 1}.pt")
#         torch.save(slice_data, file_path)
#         print(f"Saved slice {i + 1} to {file_path}")

# # 假设你已经准备好了 `flattened_text_list` 和 `flattened_image_list`
flattened_text_list = np.concatenate(text_token_meta_list).tolist()
flattened_image_list = np.concatenate(image_token_meta_list).tolist()

# # 设置保存参数
# output_folder = "/data/changye/output_folder_path"  # 替换为实际路径
# text_prefix = "flattened_text_list"
# image_prefix = "flattened_image_list"
# slice_size = 50000  # 每个文件保存 10000 条数据

# # 保存分片的文件
# save_sliced_pt(flattened_text_list, output_folder, text_prefix, slice_size)
# save_sliced_pt(flattened_image_list, output_folder, image_prefix, slice_size)


In [8]:
#我现在有两个list,flattened_text_list和flattend_image_list,他们的每个元素,称之为token都包含两个字典dict_keys(['features', 'logits']),其中features这个key对应的value也是一个字典,它的key为feature_index,value为feature_activation_value,
#现在我希望对每个feature对照一个排序,每个feature要排序出它激活值最高的30个token(假设features中的子字典没有这个feature的index,则说明它在该token的激活值为0),并且能够区分哪些token是text的,哪些是image的.请补全以下代码:
feature_num = 65536  # 定义特征的总数量
features_top = [[] for _ in range(feature_num)]  # 初始化每个特征的列表，用于存储对应的 tokens

# 处理文本 tokens
with tqdm.tqdm(total=len(flattened_text_list), desc="Processing flattened_text_list") as pbar:
    for token in flattened_text_list:
        for feature_index, activation_value in token['features'].items():
            # 将激活值、token 和类型 ('text') 添加到对应特征的列表中
            features_top[feature_index].append((activation_value, token, 'text'))
        pbar.update(1)
        pbar.refresh()

# 处理图像 tokens
with tqdm.tqdm(total=len(flattened_image_list), desc="Processing flattened_image_list") as pbar:
    for token in flattened_image_list:
        for feature_index, activation_value in token['features'].items():
            # 将激活值、token 和类型 ('image') 添加到对应特征的列表中
            features_top[feature_index].append((activation_value, token, 'image'))
        pbar.update(1)
        pbar.refresh()

# 对每个特征进行排序并选取激活值最高的 30 个 tokens
with tqdm.tqdm(total=feature_num, desc="Processing feature_num") as pbar:
    for i in range(feature_num):
        tokens_with_activation = features_top[i]
        if tokens_with_activation:
            # 按激活值从大到小排序
            tokens_with_activation.sort(key=lambda x: x[0], reverse=True)
            # 选取前 30 个激活值最高的 tokens
            features_top[i] = tokens_with_activation[:30]
        else:
            # 如果没有对应的 tokens，设为空列表
            features_top[i] = []
        pbar.update(1)
        pbar.refresh()



Processing flattened_text_list: 100%|██████████| 26395/26395 [01:31<00:00, 289.00it/s] 
Processing flattened_image_list: 100%|██████████| 1176000/1176000 [22:35<00:00, 867.72it/s] 
Processing feature_num: 100%|██████████| 65536/65536 [02:08<00:00, 508.18it/s]


In [9]:
text_feature_list = []  # 存储所有 tokens 均为文本的特征索引
image_feature_list = []  # 存储所有 tokens 均为图像的特征索引
cosi_feature_list = []  # 存储同时包含文本和图像 tokens 的特征索引及其平均余弦相似度

# 遍历每个特征
with tqdm.tqdm(total=feature_num, desc="Processing feature_num") as pbar:
    for i in range(feature_num):
        tokens = features_top[i]  # 获取特征 i 对应的 tokens 列表
        if not tokens:
            continue  # 如果没有 tokens，跳过该特征

        # 分别存储文本和图像 tokens
        text_tokens = []
        image_tokens = []

        for activation_value, token, token_type in tokens:
            if token_type == 'text':
                text_tokens.append((activation_value, token))
            elif token_type == 'image':
                image_tokens.append((activation_value, token))

        # 判断特征类型并分类
        if len(text_tokens) == len(tokens):
            # 如果所有 tokens 均为文本
            text_feature_list.append(i)
        elif len(image_tokens) == len(tokens):
            # 如果所有 tokens 均为图像
            image_feature_list.append(i)
        else:
            # 同时包含文本和图像 tokens
            if len(text_tokens) >= 5 and len(image_tokens) >= 5:
                # 取前 5 个激活值最高的文本和图像 tokens
                top_text_tokens = text_tokens[:5]
                top_image_tokens = image_tokens[:5]

                # 提取 logits
                text_logits = [token['logits'] for _, token in top_text_tokens]
                image_logits = [token['logits'] for _, token in top_image_tokens]

                # 计算每个文本 logits 与每个图像 logits 的余弦相似度，并取平均值
                cosine_similarities = []
                for t_logit in text_logits:
                    for i_logit in image_logits:
                        # 确保 logits 是 numpy 数组
                        t_logit = np.array(t_logit)
                        i_logit = np.array(i_logit)

                        # 计算余弦相似度
                        numerator = np.dot(t_logit, i_logit)
                        denominator = np.linalg.norm(t_logit) * np.linalg.norm(i_logit)
                        if denominator == 0:
                            cosine_similarity = 0
                        else:
                            cosine_similarity = numerator / denominator
                        cosine_similarities.append(cosine_similarity)

                # 计算平均余弦相似度
                average_cosine_similarity = np.mean(cosine_similarities)

                # 将特征索引和平均余弦相似度添加到列表
                cosi_feature_list.append((i, average_cosine_similarity))
        pbar.update(1)
        pbar.refresh()


Processing feature_num:  48%|████▊     | 31500/65536 [00:37<00:40, 849.19it/s] 


In [10]:
with open('/data/changye/data/RLAIF-V_interp/RLAIF-V_cosi_weight/text_feature_list.txt', 'w') as f:
    for feature_index in text_feature_list:
        f.write(f"{feature_index}\n")

# 保存 image_feature_list
with open('/data/changye/data/RLAIF-V_interp/RLAIF-V_cosi_weight/image_feature_list.txt', 'w') as f:
    for feature_index in image_feature_list:
        f.write(f"{feature_index}\n")

# 保存 cosi_feature_list
with open('/data/changye/data/RLAIF-V_interp/RLAIF-V_cosi_weight/cosi_feature_list.txt', 'w') as f:
    for feature_index, average_cosine_similarity in cosi_feature_list:
        f.write(f"{feature_index},{average_cosine_similarity}\n")