In [2]:
import glob
import json
from transformers import AutoProcessor, Blip2ForConditionalGeneration, BitsAndBytesConfig


orignial_prompts=[]
img_paths=[]
# 文件夹路径
folder_path = "./imgs"

# 找到所有 .json 文件
json_files = glob.glob(f"{folder_path}/*.json")

# 遍历读取
for file in json_files:
    with open(file, 'r', encoding='utf-8') as f:
        data = json.load(f)
        orignial_prompts.append(data['prompt'])
        img_paths.append('./imgs/'+data['local_path'].split('/')[-1])

print(len(orignial_prompts))
# best_models/sd_blip2_76_50 img2text
#  RL text2text

300


In [2]:
u_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [3]:
from pathlib import Path
from PIL import Image
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"


# 收集 pixel_values 的 list
pixel_values_list = []

# 遍历每张图
for file in img_paths:
    # 打开图片 & 转成 RGB
    image = Image.open(file).convert('RGB')
    
    # 用 u_processor 处理
    inputs = u_processor(images=image, return_tensors="pt").to(device)
    
    # 获取 pixel_values (通常是 tensor)
    pixel_values = inputs.pixel_values  # 形状一般是 [1, C, H, W]
    
    # 加到 list 里（你要的是 list）
    pixel_values_list.append(pixel_values)

# 最终得到 list[pixel_values]
print(f"总共处理了 {len(pixel_values_list)} 张图片")

总共处理了 300 张图片


In [4]:
from peft import PeftModel,PeftConfig

peft_model_id='../best_models/sd_blip2_76_50/best'
config=PeftConfig.from_pretrained(peft_model_id)
bstmodel=Blip2ForConditionalGeneration.from_pretrained("ybelkada/blip2-opt-2.7b-fp16-sharded", device_map="auto", load_in_8bit=True)
u_model=PeftModel.from_pretrained(bstmodel,peft_model_id)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [5]:
import requests
from PIL import Image
from tqdm import tqdm
import io

def generate_cap(img_vecs,u_model,u_processor)->list:
    u_model.eval()
    generation = []
    for index in tqdm(range(len(img_vecs))):
        pixel_values = img_vecs[index]
        generated_ids = u_model.generate(pixel_values=pixel_values)
        generated_text = u_processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        generation.append(generated_text)
    
    return generation

In [6]:
test_generated_caption = generate_cap(pixel_values_list,u_model,u_processor)

  0%|          | 0/300 [00:00<?, ?it/s]Expanding inputs for image tokens in BLIP-2 should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. Using processors without these attributes in the config is deprecated and will throw an error in v4.50.
100%|██████████| 300/300 [12:01<00:00,  2.41s/it]


In [7]:
print(type(orignial_prompts),type(test_generated_caption))

<class 'list'> <class 'list'>


In [8]:
import pandas as pd

test_compare_df = pd.DataFrame({'generated_caption':test_generated_caption,'reference_prompt':orignial_prompts})
test_compare_df.to_parquet(f'blip2_ft_sd/result/other_generative_model_test.parquet')

In [9]:
compare_promts = pd.read_parquet(f'blip2_ft_sd/result/other_generative_model_test.parquet')
compare_promts

Unnamed: 0,generated_caption,reference_prompt
0,a beautiful fantasy digital painting of a Nors...,a beautiful Norse Warrior based on Hades with ...
1,"a blueprints of new york city, concept art, co...","blueprints for New York City, concept art, blu..."
2,"a spider with eyes of blood, concept art, high...","d d monster, huge spider monster covered in ey..."
3,"concept art of medieval knight armor, 4 k, hig...","different views of medieval knights, beautiful..."
4,lofi underwater bioshock steampunk portrait of...,"lofi underwater steampunk bioshock bikini, oct..."
...,...,...
295,"a beautiful painting of a medieval knight, by ...",a robot wearing medieval armor in the style of...
296,steampunk portrait of a man with a top hat and...,"Lofi Steampunk portrait, Pixar style, by Trist..."
297,a beautiful painting of a dinosaur in a candyl...,"A dinosaur in a candy forest, digital art, tre..."
298,"a beautiful painting of a cute cat smiling, bl...",cute blue striped cat of cheshire from alice i...


In [6]:
import pandas as pd

compare_promts = pd.read_parquet(f'blip2_ft_sd/result/other_generative_model_test.parquet')
compare_promts['img_path']=img_paths
compare_promts.to_parquet(f'blip2_ft_sd/result/other_generative_model_test.parquet')