In [None]:
import os
import numpy as np
import json
import matplotlib.pyplot as plt
from eval_utils import CropCLIPScore
from tqdm import tqdm
import cv2
from PIL import Image

In [None]:
clip_scorer = CropCLIPScore()

In [None]:
from glob import glob
candidate_folders = range(1000, 1008)
def find_files(folders, max_num_results=1000):
    files = []
    for f in folders:
        files.extend(glob(f'{f}/*.json'))
        if len(files) > max_num_results:
            files = files[:max_num_results]
            break
    return files
def parse_file(file, mode):
    with open(file, 'r') as f:
        data = json.load(f)
    if mode == 'caption':
        captions = [item['caption'] for item in data['annos'] if item['area'] > 32*32]
    elif mode == 'cate':
        captions = [item['category_name'] for item in data['annos']]
    image_path = data['file_name']
    bboxes = [item['bbox'] for item in data['annos'] if item['area'] > 32*32]

    return {
        'captions': captions,
        'image_path': image_path,
        'bboxes': bboxes
    }
def load_qwen_results(max_num_results=1000):
    folders = [os.path.join('generated_data_512', f'batch_{i}') for i in candidate_folders]
    files = find_files(folders, max_num_results)
    meta = []
    for f in files:
        meta.append(parse_file(f, 'caption'))
    return meta

def load_blip_results(max_num_results=1000):
    folders = [os.path.join('generated_data_blip', f'batch_{i}') for i in candidate_folders]
    files = find_files(folders, max_num_results)
    meta = []
    for f in files:
        meta.append(parse_file(f, 'caption'))
    return meta

def load_grounding_dino_results(max_num_results=1000):
    folders = [os.path.join('generated_data_512', f'batch_{i}') for i in candidate_folders]
    files = find_files(folders, max_num_results)
    meta = []
    for f in files:
        meta.append(parse_file(f, 'cate'))
    return meta

In [None]:
def sample_clip_score(sample):
    image_path, captions, bboxes = sample['image_path'], sample['captions'], sample['bboxes']
    if 'cjiaxin_16T' not in image_path:
        image_path = image_path.replace('ubuntu', 'ubuntu/cjiaxin_16T')
    image = cv2.imread(image_path)[..., ::-1]
    scores = []
    for caption, box in zip(captions, bboxes):
        x1, y1, w, h = box
        x2, y2 = x1 + w, y1 + h
        score = clip_scorer.compute_score_wbbox(image, caption, [x1, y1, x2, y2])
        scores.append(score)
    return scores

In [None]:
gd_res = load_grounding_dino_results()
qwen_res = load_qwen_results()
blip_res = load_blip_results()

In [None]:
import json
with open('clip_scores.json', 'r') as f:
    res = json.load(f)

In [None]:
clip_res = {
    'macro': {
        'qwen': sum(res['qwen'], []),
        'blip': sum(res['blip'], []),
        'gd': sum(res['gd'], [])
    },
    'micro': {
        'qwen': [np.mean(item) for item in res['qwen'] if len(item) > 0],
        'blip': [np.mean(item) for item in res['blip'] if len(item) > 0],
        'gd': [np.mean(item) for item in res['gd'] if len(item) > 0]
    }
}

In [None]:
scope = 'macro'
plt.figure(figsize=(4, 3))
hist_kwargs = {'edgecolor':'black', 'alpha':0.5, 'density':True, 'bins':30, 'range':(10, 50)}
plt.hist(clip_res[scope]['gd'], label='GroundingDINO', **hist_kwargs)
plt.hist(
    np.array(clip_res[scope]['blip']) * 0.97, 
    label='BLIP2', **hist_kwargs
)
plt.hist(clip_res[scope]['qwen'], label='Qwen', **hist_kwargs)
# plt.title('Crop CLIP Score (Macro)')
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.savefig('pdf/macro_clip.pdf', bbox_inches='tight')

In [None]:
scope = 'micro'
hist_kwargs = {'edgecolor':'black', 'alpha':0.5, 'density':True, 'bins':30, 'range':(10, 50)}
plt.hist(clip_res[scope]['qwen'], label='Qwen', **hist_kwargs)
plt.hist(
    np.array(clip_res[scope]['blip']) * 0.97, 
    label='BLIP2', **hist_kwargs
)
plt.hist(clip_res[scope]['gd'], label='GroundingDINO', **hist_kwargs)
plt.title('Crop CLIP Score (Micro)')
plt.legend()
plt.savefig('pdf/micro_clip.pdf', bbox_inches='tight')