In [2]:
import sys
from nltk.stem import *
import nltk
import json
from pattern.en import singularize
import argparse
import tqdm
import time
import random
lemma = nltk.wordnet.WordNetLemmatizer()

def combine_coco_captions(annotation_path):

    # if not os.path.exists('%s/captions_%s2014.json' %(annotation_path, 'val')):
    #     raise Exception("Please download MSCOCO caption annotations for val set")
    # if not os.path.exists('%s/captions_%s2014.json' %(annotation_path, 'train')):
    #     raise Exception("Please download MSCOCO caption annotations for train set")
    with open(annotation_path) as file:
        all_caps = json.load(file)
    return all_caps 


def combine_coco_instances(annotation_path):

    # if not os.path.exists('%s/instances_%s2014.json' %(annotation_path, 'val')):
    #     raise Exception("Please download MSCOCO instance annotations for val set")
    # if not os.path.exists('%s/instances_%s2014.json' %(annotation_path, 'train')):
    #     raise Exception("Please download MSCOCO instance annotations for train set")

    # val_instances = json.load(open('%s/instances_%s2014.json' %(annotation_path, 'val')))
    # train_instances = json.load(open('%s/instances_%s2014.json' %(annotation_path, 'train')))
    # all_instances = {'info': train_instances['info'],
    #                  'licenses': train_instances['licenses'],
    #                  'type': train_instances['licenses'],
    #                  'categories': train_instances['categories'],
    #                  'images': train_instances['images'] + val_instances['images'],
    #                  'annotations': val_instances['annotations'] + train_instances['annotations']}
    # train_instances.extend(val_instances)
    # all_instances = train_instances
    with open(annotation_path) as file:
        all_instances = json.load(file)
    return all_instances 

class CHAIR(object):

    def __init__(self, imids):

        self.imid_to_objects = {imid: [] for imid in imids}
        synonyms = open('/data/dtt/project/LLaVA-main/hallucination/code/annotations/synonyms.txt').readlines()
        synonyms = [s.strip().split(', ') for s in synonyms]
        self.synomyms = {}
        for s_line_ls in synonyms:
            for single_s in s_line_ls:
                self.synomyms[single_s] = s_line_ls[0]
        #read in synonyms
        # synonym_path = ""
        # synonyms = open('data/synonyms.txt').readlines()
        # synonyms = [s.strip().split(', ') for s in synonyms]
        self.mscoco_objects = [] #mscoco objects and *all* synonyms
        self.inverse_synonym_dict = {}
        self.mscoco_objects_total = []
        self.inverse_synonym_dict_total = {}
        synonym_path = ""
        synonyms = open('/data/dtt/project/LLaVA-main/hallucination/code/annotations/synonyms.txt').readlines()
        synonyms = [s.strip().split(', ') for s in synonyms]
        for synonym in synonyms:
            self.mscoco_objects_total.extend(synonym) #["all names"]
            for s in synonym:
                self.inverse_synonym_dict_total[s] = synonym[0]

        #read in synonyms
        synonym_path = "/data/dtt/project/LLaVA-main/hallucination/code/annotations/object_synsets.json"
        with open(synonym_path) as file:
            synonyms = json.load(file)
        for object_key,object_value in synonyms.items():
            self.mscoco_objects.append(object_key)
            self.inverse_synonym_dict[object_key] = object_value.split('.')[0]
            #"black hole": "hole.n.01"

        #Some hard coded rules for implementing CHAIR metrics on MSCOCO
        
        # #common 'double words' in MSCOCO that should be treated as a single word
        # coco_double_words = ['motor bike', 'motor cycle', 'air plane', 'traffic light', 'street light', 'traffic signal', 'stop light', 'fire hydrant', 'stop sign', 'parking meter', 'suit case', 'sports ball', 'baseball bat', 'baseball glove', 'tennis racket', 'wine glass', 'hot dog', 'cell phone', 'mobile phone', 'teddy bear', 'hair drier', 'potted plant', 'bow tie', 'laptop computer', 'stove top oven', 'hot dog', 'teddy bear', 'home plate', 'train track']
        
        # #Hard code some rules for special cases in MSCOCO
        # #qualifiers like 'baby' or 'adult' animal will lead to a false fire for the MSCOCO object 'person'.  'baby bird' --> 'bird'.
        # animal_words = ['bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'animal', 'cub']
        # #qualifiers like 'passenger' vehicle will lead to a false fire for the MSCOCO object 'person'.  'passenger jet' --> 'jet'.
        # vehicle_words = ['jet', 'train']
        
        # #double_word_dict will map double words to the word they should be treated as in our analysis
        
        # self.double_word_dict = {}
        # for double_word in coco_double_words:
        #     self.double_word_dict[double_word] = double_word
        # for animal_word in animal_words:
        #     self.double_word_dict['baby %s' %animal_word] = animal_word
        #     self.double_word_dict['adult %s' %animal_word] = animal_word
        # for vehicle_word in vehicle_words:
        #     self.double_word_dict['passenger %s' %vehicle_word] = vehicle_word
        # self.double_word_dict['bow tie'] = 'tie'
        # self.double_word_dict['toilet seat'] = 'toilet'
        # self.double_word_dict['wine glas'] = 'wine glass'

    def _load_generated_captions_into_evaluator(self, cap_file):

        '''
        Meant to save time so imid_to_objects does not always need to be recomputed.
        '''
        #Read in captions        
        self.caps, imids = load_generated_captions(cap_file)

        assert imids == set(self.imid_to_objects.keys())

    def caption_to_words(self, caption):
    
        '''
        Input: caption
        Output: MSCOCO words in the caption
        '''
    
        #standard preprocessing
        if caption == None:
            return [],[],[],[]
        words = nltk.word_tokenize(caption.lower())
        words = [singularize(w) for w in words]
        words = list(set(words))
        raw_words = words
        #replace double words
        #i = 0
        # double_words = []
        # idxs = []
        # while i < len(words):
        #    idxs.append(i) 
        #    double_word = ' '.join(words[i:i+2])
        #    if double_word in self.double_word_dict: 
        #        double_words.append(self.double_word_dict[double_word])
        #        i += 2
        #    else:
        #        double_words.append(words[i])
        #        i += 1
        # words = double_words

        
        #toilet seat is not chair (sentences like "the seat of the toilet" will fire for "chair" if we do not include this line)
        #if ('toilet' in words) & ('seat' in words): words = [word for word in words if word != 'seat']
    
        #get synonyms for all words in the caption
        idxs = [i for i in range(len(words))]
        idxs = [idxs[idx] for idx, word in enumerate(words) \
                if word in set(self.mscoco_objects)]
        words = [word for word in words if word in set(self.mscoco_objects)]
        node_words = []
        for word in words:
            node_words.append(self.inverse_synonym_dict[word])
        #return all the MSCOCO objects in the caption
        return words, node_words, idxs,raw_words


    def caption_to_words_total(self,caption):
        if caption == None:
            return [],[],[],[]
        words = nltk.word_tokenize(caption.lower())
        words = [singularize(w) for w in words]
        words = list(set(words))
        raw_words = words

        idxs = [i for i in range(len(words))]
        idxs = [idxs[idx] for idx, word in enumerate(words) \
                if word in set(self.mscoco_objects_total)]
        words = [word for word in words if word in set(self.mscoco_objects_total)]
        node_words = []
        for word in words:
            node_words.append(self.inverse_synonym_dict_total[word])
        return words, node_words, idxs,raw_words
    def get_annotations_from_segments(self,object_json_path):
        '''
        Add objects taken from MSCOCO segmentation masks
        '''

        # coco_segments = combine_coco_instances(self.coco_path)
        # segment_annotations = coco_segments['annotations']

        # #make dict linking object name to ids
        # id_to_name = {} #dict with id to synsets 
        # for cat in coco_segments['categories']:
        #     id_to_name[cat['id']] = cat['name']
        # #类别的id和类别的name之间的对应
        
        # for i, annotation in enumerate(segment_annotations):
        #     sys.stdout.write("\rGetting annotations for %d/%d segmentation masks" 
        #                       %(i, len(segment_annotations)))
        #     imid = annotation['image_id']
        #     if imid in self.imid_to_objects:
        #         node_word = self.inverse_synonym_dict[id_to_name[annotation['category_id']]]
        #         self.imid_to_objects[imid].append(node_word)
        # print ("\n")
        # for imid in self.imid_to_objects:
        #     self.imid_to_objects[imid] = set(self.imid_to_objects[imid])


        # #return self.imid_to_objects 
        # #img_id:node_word

        coco_segment = combine_coco_instances(object_json_path) #list
        #import ipdb;ipdb.set_trace()
        for i in tqdm.tqdm(range(len(coco_segment)),colour = 'blue'):
            temp_object = coco_segment[i] # dict
            img_id = temp_object['image_id']
            object_segment = temp_object['objects'] #list  length == number of object
            if img_id in self.imid_to_objects:
                
                for j in range(len(object_segment)):
                    object_synsets_name = object_segment[j]['synsets']
                    if object_synsets_name != []:
                        object_synsets_name = object_synsets_name[0] #list[-1]
                        object_synsets_name = object_synsets_name.split('.')[0]
                        if object_synsets_name in self.synomyms:
                            object_synsets_name = self.synomyms[object_synsets_name]
                        self.imid_to_objects[img_id].append(object_synsets_name)
        for img_id in self.imid_to_objects:
            self.imid_to_objects[img_id] = set(self.imid_to_objects[img_id])
        #import ipdb;ipdb.set_trace()
    
    def get_annotations_from_captions(self,question_answer_path):
        '''
        Add objects taken from MSCOCO ground truth captions 
        '''

        # coco_caps = combine_coco_captions(question_answer_path)
        # caption_annotations = coco_caps['annotations']

        # for i, annotation in enumerate(caption_annotations):
        #     sys.stdout.write('\rGetting annotations for %d/%d ground truth captions' 
        #                       %(i, len(coco_caps['annotations'])))
        #     imid = annotation['image_id']
        #     if imid in self.imid_to_objects:
        #         _, node_words, _, _ = self.caption_to_words(annotation['caption'])
        #         self.imid_to_objects[imid].update(node_words)
        # print ("\n")

        # for imid in self.imid_to_objects:
        #     self.imid_to_objects[imid] = set(self.imid_to_objects[imid])

        #return self.imid_to_objects
        #img_id node_word
        

        coco_caps = combine_coco_captions(question_answer_path)
        for i in tqdm.tqdm(range(len(coco_caps)),colour = 'green'):
            temp_question = coco_caps[i]#dict
            for j in range(len(temp_question['qas'])):
                img_id = temp_question['qas'][j]['image_id']
                
                if img_id in self.imid_to_objects:
                    single_caption = temp_question['qas'][j]['question']
                    if img_id == 2335234:
                        print("single_caption:{}".format(single_caption))
                    words, node_words, _,_=self.caption_to_words_total(single_caption)
                    for node_word in node_words:
                        if node_word in self.synomyms:
                            node_word = self.synomyms[node_word]
                        self.imid_to_objects[img_id].add(node_word)
                        
        for image_id in self.imid_to_objects:
            self.imid_to_objects[image_id] = set(self.imid_to_objects[image_id])
        #import ipdb;ipdb.set_trace()

    def get_annotations(self):

        '''
        Get annotations from both segmentation and captions.  Need both annotation types for CHAIR metric.
        '''

        self.get_annotations_from_segments("/data/dtt/project/LLaVA-main/hallucination/code/annotations/objects.json") 
        self.get_annotations_from_captions("/data/dtt/project/LLaVA-main/hallucination/code/annotations/new_qa.json") 

    def compute_chair(self, cap_file):
    
        '''
        Given ground truth objects and generated captions, determine which sentences have hallucinated words.
        '''
    
        self._load_generated_captions_into_evaluator(cap_file) #generate_caption_file

        imid_to_objects = self.imid_to_objects
        caps = self.caps #caps is a list
 
        num_caps = 0.
        num_hallucinated_caps = 0.
        hallucinated_word_count = 0.
        coco_word_count = 0.

        output = {'sentences': []} 
    
        for i in tqdm.tqdm(range(len(caps)),colour = 'red'):
            cap_eval = caps[i]
            cap = cap_eval['caption']
            imid = cap_eval['image_id']

            #get all words in the caption, as well as corresponding node word
            words, node_words, idxs,raw_words = self.caption_to_words_total(cap) #single caption 

            gt_objects = imid_to_objects[imid]
            cap_dict = {'image_id': cap_eval['image_id'], 
                        'caption': cap,
                        'mscoco_hallucinated_words': [],
                        'mscoco_gt_words': list(gt_objects),
                        'mscoco_generated_words': list(node_words),
                        'hallucination_idxs': [], 
                        'words': raw_words 
                        }
   
            cap_dict['metrics'] = {'Bleu_1': 0,
                                   'Bleu_2': 0,
                                   'Bleu_3': 0,
                                   'Bleu_4': 0,
                                   'METEOR': 0,
                                   'CIDEr': 0,
                                   'SPICE': 0,
                                   'ROUGE_L': 0,
                                   'CHAIRs': 0,
                                   'CHAIRi': 0}
 
            #count hallucinated words
            coco_word_count += len(node_words) #生成的caption里面存在的object个数。 
            hallucinated = False
            #print('gt_objects:{}'.format(gt_objects))
            #print("words:{}".format(words))
            print("cap:{}".format(cap))
            for word, node_word, idx in zip(words, node_words, idxs): #words是一个list
                if node_word in self.synomyms:
                    node_word = self.synomyms[node_word]
                if node_word in gt_objects:
                    #print("true node_word:{}".format(node_word))
                    pass
                if node_word not in gt_objects:
                    print("negetive node_word:{}".format(node_word))
                    #print("negetive word:{}".format(word))
                    hallucinated_word_count += 1 
                    cap_dict['mscoco_hallucinated_words'].append((word, node_word))
                    cap_dict['hallucination_idxs'].append(idx)
                    hallucinated = True      
    
            #count hallucinated caps
            num_caps += 1
            if hallucinated:
               num_hallucinated_caps += 1
    
            cap_dict['metrics']['CHAIRs'] = int(hallucinated)
            cap_dict['metrics']['CHAIRi'] = 0.
            if len(words) > 0:
                cap_dict['metrics']['CHAIRi'] = len(cap_dict['mscoco_hallucinated_words'])/float(len(words))
            #time.sleep(2)
            output['sentences'].append(cap_dict)
 
        chair_s = (num_hallucinated_caps/num_caps)
        chair_i = (hallucinated_word_count/coco_word_count)
    
        output['overall_metrics'] = {'Bleu_1': 0,
                                     'Bleu_2': 0,
                                     'Bleu_3': 0,
                                     'Bleu_4': 0,
                                     'METEOR': 0,
                                     'CIDEr': 0,
                                     'SPICE': 0,
                                     'ROUGE_L': 0,
                                     'CHAIRs': chair_s,
                                     'CHAIRi': chair_i}
    
        return output 

def load_generated_captions(cap_file,top_beam=True):
    #Read in captions        
    #cap_file [{'caption':,'image_id':},{}]
    
    #caps_data = json.load(open(cap_file))
    #caps = caps_data['caption_image_id']
    #imids = set([cap['image_id'] for cap in caps])
    #
    #    try:
    #        caps = caps['imgToEval'].values()
    #        imids = set([cap['image_id'] for cap in caps])
    #    except:
    #        raise Exception("Expect caption file to consist of a dectionary with sentences correspdonding to the key 'imgToEval'")
    
    single_qustion_dict = json.load(open(cap_file))
    image_name_ls = []
    image_value_ls = []
    count = 0
    for image_name,image_value in single_qustion_dict.items():
        image_name_ls.append(image_name)
        image_value_ls.append(image_value)
    caps = []
    for idx in tqdm.tqdm(range(len(image_value_ls))):
        image_name = image_name_ls[idx]
        image_value = image_value_ls[idx]
        
        # with open(file = "test_image_name.txt",encoding='utf-8',mode = 'a+') as file:
        #     file.write(str(image_name)+'\n')
        if type(image_value) == list:
            continue
        top_k_list = image_value["top_k_text"]
        beam_list = image_value['beam_text:']
        temp_idx = -1
        temp_top_k_len = 128
        my_list = [0, 1, 2, 3]
        random_element = random.choice(my_list)
        if top_k_list == None or beam_list == None:
            continue
        for i in range(len(top_k_list)):
            if len(top_k_list[i]) < temp_top_k_len:
                temp_idx = i
                temp_top_k_len = len(top_k_list[i])
        rs_top_k = top_k_list[temp_idx] if temp_idx != -1 else None
        rs_top_k = top_k_list[random_element]
        temp_idx = -1
        temp_beam_len = 128
        my_list = [0, 1]
        random_element = random.choice(my_list)
        for i in range(len(beam_list)):
            if len(beam_list[i]) < temp_beam_len:
                temp_idx = i
                temp_beam_len = len(beam_list[i])
        rs_beam = beam_list[temp_idx] if temp_idx != -1 else None
        rs_beam = beam_list[random_element]
        if top_beam:
            caps.append({'caption':rs_top_k,'image_id':int(image_name[:-4])})
        else:
            caps.append({'caption':rs_beam,'image_id':int(image_name[:-4])})
        #caps.append({'caption':rs_top_k,'image_id':int(image_name[:-4])})
        #caps.append({'caption':rs_beam,'image_id':int(image_name[:-4])})
    imids = set([cap['image_id'] for cap in caps])
    return caps, imids

def save_hallucinated_words(cap_file, cap_dict): 
    tag = cap_file.split('/')[-1] 
    with open('/data/dtt/project/LLaVA-main/hallucination/code/output/hallucinated_words_%s' %tag, 'a+') as f:
        json.dump(cap_dict, f)

def print_metrics(hallucination_cap_dict, quiet=False):
    sentence_metrics = hallucination_cap_dict['overall_metrics']
    metric_string = "%0.01f\t%0.01f\t%0.01f\t%0.01f\t%0.01f" %(
                                                  sentence_metrics['SPICE']*100,
                                                  sentence_metrics['METEOR']*100,
                                                  sentence_metrics['CIDEr']*100,
                                                  sentence_metrics['CHAIRs']*100,
                                                  sentence_metrics['CHAIRi']*100)

    if not quiet:
        print ("SPICE\tMETEOR\tCIDEr\tCHAIRs\tCHAIRi")
        print (metric_string)

    else:
        return metric_string
 



KeyboardInterrupt: 

In [None]:

cap_file = "/data/dtt/project/LLaVA-main/hallucination/code/original_details/1000_captions.json"
#cap_file = "C:/Users/hutter_sadan/Desktop/UCAS/blip2/data/VG_BLIP2_500_6.json"
_, imids = load_generated_captions(cap_file,True)
print(imids) 
evaluator = CHAIR(imids) 
evaluator.get_annotations()
print(len(imids))
cap_dict = evaluator.compute_chair(cap_file)     
print_metrics(cap_dict)
#save_hallucinated_words(args.cap_file, cap_dict)

100%|██████████| 499/499 [00:00<00:00, 250054.68it/s]


{2365441, 2412553, 2344971, 2357261, 2334734, 2351119, 2349075, 2416667, 2367520, 2316323, 2094, 2332720, 2353201, 2349105, 2379830, 2369602, 2361413, 61513, 2396236, 2365521, 2345047, 2336859, 2361437, 2318429, 2334815, 2355297, 2394217, 2359410, 2392180, 2377845, 2402423, 2336889, 2355322, 2332801, 2326662, 2367625, 2369674, 2347152, 2416784, 2338980, 2343083, 2412716, 2400432, 2379952, 2379960, 2322620, 190, 2400447, 2373826, 2375883, 2347213, 207, 2384079, 2406615, 2330845, 2373855, 2345189, 2394341, 2398439, 2392298, 2398446, 2355445, 2414850, 2359559, 2392331, 2377997, 2318614, 2404631, 2378006, 2398486, 2324762, 2335005, 2416934, 2347306, 2382124, 2349, 2320689, 2345270, 2410806, 2378041, 2332986, 2345277, 2318667, 2388317, 2400606, 2328928, 713057, 2361698, 2363755, 2361709, 2392431, 2369904, 2316658, 2374002, 2326905, 2406783, 4479, 2351487, 2341257, 2386314, 4494, 2400655, 4503, 2367895, 2363805, 2363806, 2341280, 2351526, 2363815, 2320815, 2367925, 2337205, 2345406, 2396607,

100%|[34m██████████[0m| 108077/108077 [00:00<00:00, 1403760.00it/s]
100%|[32m██████████[0m| 396/396 [00:00<00:00, 385548.84it/s]


499


100%|██████████| 499/499 [00:00<00:00, 249043.04it/s]
 34%|[31m███▍      [0m| 171/499 [00:00<00:00, 1709.94it/s]

cap:white linens,
cap:the ostrich is long and black with a white patch of skin
cap:black poles and trees
cap:tray has vegetables and meat on it
cap:stone buildings, a green grassy field, a tower and towering trees
cap:i see a stop light and a street sign near some trees
cap:a pier in the water near the ocean
cap:man is doing a skateboard trick
cap:boy in tie
negetive node_word:tie
negetive node_word:person
cap:Several spectators are on the tennis court
cap:people in the picture are standing on the court watching
cap:people are walking in the road
negetive node_word:person
cap:three men looking at the view
cap:the room has curtains
cap:airplane in the sky, with landing apron
cap:man standing on the motorcycle
cap:The player is playing tennis. His tennis racket is in front of him.
negetive node_word:tennis racket
negetive node_word:person
cap:person is walking in snow
cap:a teen is riding a skateboard
cap:the man is wearing a black shirt
cap:of a man and woman smiling together
cap:four b

100%|[31m██████████[0m| 499/499 [00:00<00:00, 1643.51it/s]


negetive node_word:pizza
cap:in brackets
cap:horses grazing
cap:the black parking meter
cap:A woman is taking a picture of a woman with the baby on her lap
cap:the toothbrushes are red
cap:a room with two beds one with a wooden top
negetive node_word:bed
cap:woman
cap:A tennis player is playing a game
cap:street lights
cap:a laptop on a desk
cap:people are walking along the sidewalk
negetive node_word:person
cap:the person is sitting in a parking lot while standing on a skateboard
negetive node_word:skateboard
cap:the bricks in the ground were bricks
cap:a brown plate of food with broccoli, parsley and cheese
cap:- the horses are pulling a carriage, the people are sitting on the ground
negetive node_word:person
cap:the man is wearing a business suit
negetive node_word:person
cap:a city street with tall buildings
cap:people are wearing t-shirts
cap:a group of people are walking around with horses on the side of the street
cap:there is a building with signs
cap:man kneels down on top of




In [None]:
#cap_file = "C:/Users/hutter_sadan/Desktop/UCAS/blip2/data/VG_BLIP2_6_original.json"
cap_file = "C:/Users/hutter_sadan/Desktop/UCAS/blip2/data/VG_BLIP2_500_6.json"
_, imids = load_generated_captions(cap_file,True)
print(imids) 
evaluator = CHAIR(imids) 
evaluator.get_annotations()
print(len(imids))
cap_dict = evaluator.compute_chair(cap_file)     
print_metrics(cap_dict)

100%|██████████| 499/499 [00:00<00:00, 166305.74it/s]


{2365441, 2412553, 2344971, 2357261, 2334734, 2351119, 2349075, 2416667, 2367520, 2316323, 2094, 2332720, 2353201, 2349105, 2379830, 2369602, 2361413, 61513, 2396236, 2365521, 2345047, 2336859, 2361437, 2318429, 2334815, 2355297, 2394217, 2359410, 2392180, 2377845, 2402423, 2336889, 2355322, 2332801, 2326662, 2367625, 2369674, 2347152, 2416784, 2338980, 2343083, 2412716, 2400432, 2379952, 2379960, 2322620, 190, 2400447, 2373826, 2375883, 2347213, 207, 2384079, 2406615, 2330845, 2373855, 2345189, 2394341, 2398439, 2392298, 2398446, 2355445, 2414850, 2359559, 2392331, 2377997, 2318614, 2404631, 2378006, 2398486, 2324762, 2335005, 2416934, 2347306, 2382124, 2349, 2320689, 2345270, 2410806, 2378041, 2332986, 2345277, 2318667, 2388317, 2400606, 2328928, 713057, 2361698, 2363755, 2361709, 2392431, 2369904, 2316658, 2374002, 2326905, 2406783, 4479, 2351487, 2341257, 2386314, 4494, 2400655, 4503, 2367895, 2363805, 2363806, 2341280, 2351526, 2363815, 2320815, 2367925, 2337205, 2345406, 2396607,

100%|[34m██████████[0m| 108077/108077 [00:00<00:00, 1286636.56it/s]
100%|[32m██████████[0m| 396/396 [00:00<00:00, 195359.25it/s]


499


100%|██████████| 499/499 [00:00<00:00, 248186.61it/s]
 31%|[31m███       [0m| 154/499 [00:00<00:00, 1539.59it/s]

cap:a bed in modern living room with a big television
cap:ostriches are eating in the grass near their zebra counterparts
negetive node_word:zebra
cap:tall buildings in a city with a white fence and a blue sky
cap:some indian food on metal tray
cap:the stone tombstone of st michael church
cap:a street corner with a traffic light and street sign
cap:some flags are hung by the water and on a pier
cap:a boy does a trick on his skateboard and doing a trick in the street
cap:a young boy holds up a tie
negetive node_word:person
negetive node_word:tie
cap:about 100 and many people are watching the tennis match
cap:a basketball game with two basketball players in motion
cap:a man is walking by a sidewalk with benches
negetive node_word:bench
negetive node_word:person
cap:men look at a bear
cap:a bed and pillows next to a window
cap:A small plane in the sky with a runway
cap:a man sitting on a dirt bike next to a tree
negetive node_word:bicycle
cap:a tennis player is about to hit a tennis ball 

 62%|[31m██████▏   [0m| 308/499 [00:00<00:00, 1496.02it/s]


negetive node_word:suitcase
negetive node_word:person
cap:traffic signal lights on t-post next to a building
cap:a bus that has been overturned
cap:A man descending a mountain with skis
negetive node_word:skis
cap:a black train parked next to a white picket fence
cap:there is soap, two sinks, a shower, and a toilet in the bathroom
cap:two trucks in an open lot and in front of a building
cap:A large pizza is sitting on top of a BBQ grill
cap:two decker buses on a road in london
negetive node_word:bus
cap:apple in a pile
cap:a wooden train tracks next to a house and a house
cap:a woman wearing a blue shirt and green shorts is on a tennis court swinging a ball and hitting a tennis racket
cap:a bench in the woods
cap:three men on a tennis court playing with tennis rackets
negetive node_word:tennis racket
cap:motorcycles on motorcycles on the road
cap:a group of people standing in one large room
cap:boats are docked in the water in a marina
cap:there is one of a giraffe and several other g

 92%|[31m█████████▏[0m| 458/499 [00:00<00:00, 1175.54it/s]


cap:a small group of ceramic and plastic bulls
cap:an airplane is parked on the tarmac
cap:a fire hydrant next to a house in front of a house
negetive node_word:fire hydrant
cap:a very decorative tall clock in a church
cap:a group of people are all sitting atop a horse.
negetive node_word:horse
negetive node_word:person
cap:vegetable soup and vegetables are cooking indoors in a pan on the stove
cap:the young boy has a teddy bear in his mouth and is looking at it in the medical room
negetive node_word:person
cap:A white horse pulling a carriage
negetive node_word:horse
cap:a sign that says no bottles are allowed on the street in new orleans
negetive node_word:bottle
cap:there is a bed in this room that does not fit in the pictures
cap:People fly kites in the park with a butterfly kite
cap:many bags sit outside storefront
cap:policemen sitting on motorcycles
negetive node_word:person
negetive node_word:motorcycle
cap:white cat sitting on desk
negetive node_word:dining table
cap:a square

100%|[31m██████████[0m| 499/499 [00:00<00:00, 1238.21it/s]


cap:a bedroom with a bed that is made up and a bed
cap:a vase in a metal vase is on the table in the lobby of a business building
cap:ham and eggs on a plate
cap:a frisbee thrown by a woman playing frisbee
cap:A teddy bear wearing a bow tie is laying on a chair surrounded with a blue fabric
negetive node_word:tie
negetive node_word:chair
negetive node_word:tie
negetive node_word:bear
cap:a man drank a water bottle while a dog ate it from his hand
negetive node_word:bottle
cap:a kite with a red octopus flying above a man
cap:in a zoo and in a zoo
cap:many bananas are hanging on a tree
cap:vases of a lot of different kind of flowers are on a table
negetive node_word:dining table
cap:flock of birds atop a beach surrounded by the sea and lighthouse
negetive node_word:bird
cap:A person watching a baseball game
cap:a lot of oranges in a bowl
negetive node_word:orange
cap:there are many suitcases that are not in order on the ground
negetive node_word:suitcase
cap:a knight riding a horse
cap:




100%|██████████| 100/100 [00:00<?, ?it/s]
100%|[31m██████████[0m| 100/100 [00:00<00:00, 1886.78it/s]

gt_objects:{'tv', 'ottoman', 'quilt', 'mirror', 'couch', 'wall', 'pillow', 'lamp', 'room', 'bed', 'curtain'}
cap:i like the white
gt_objects:{'leg', 'wall', 'feather', 'grass', 'head', 'shadow', 'gun_muzzle', 'animal', 'neck', 'eating', 'log', 'nose', 'trunk', 'bird', 'land', 'fur'}
cap:of the ostrich eating
gt_objects:set()
cap:a black fence
gt_objects:{'coconut_milk', 'handle', 'pretzel', 'noodle', 'meat', 'compartment', 'utensil', 'gravy', 'dining table', 'corn', 'metallic_element', 'tortilla', 'vegetable', 'dish', 'bowl', 'food', 'rice', 'part'}
cap:the plate is silver
gt_objects:{'railing', 'gravestone', 'cemetery', 'grass', 'building', 'house', 'roof', 'sky', 'bench', 'post', 'window', 'cross', 'chimney'}
cap:sky
gt_objects:{'tree', 'sidewalk', 'plaque', 'traffic_light', 'cable', 'building', 'car', 'sign', 'street', 'sky', 'traffic light', 'pole', 'tire', 'light', 'cloud'}
cap:i see a stop light and a street sign near some trees
gt_objects:{'water', 'boat', 'fence', 'sign', 'dock


