In [1]:
import pandas as pd
from datasets import load_dataset
from IPython.display import display
import json

In [2]:
data_files = {"train": "train.jsonl", "dev": "dev.jsonl", "test": "test.jsonl"}
dataset = load_dataset("cambridgeltl/vsr_random", data_files=data_files)

with open('annotations/instances_train2017.json', 'r') as f:
    coco_data = json.load(f)

unique_cats = {category['name'] for category in coco_data['categories']}
print(len(unique_cats))

80


In [3]:
all_relations = []

for subset in ['train', 'dev', 'test']:
    for image in dataset[subset]:
        if image['relation'] not in all_relations:
            all_relations.append(image['relation'])

pd.DataFrame(all_relations, columns=['Relation']).to_csv('relation_list.csv')
len(all_relations)

66

In [4]:
true_images = []
true_image_relations = []
false_images = []
false_image_relations = []

for subset in ['train', 'dev', 'test']:
    for image in dataset[subset]:
        if image['label'] == 1:
            true_images.append(image)
            if image['relation'] not in true_image_relations:
                true_image_relations.append(image['relation'])
        else:
            false_images.append(image)
            if image['relation'] not in false_image_relations:
                false_image_relations.append(image['relation'])

print(len(true_images))
print(len(false_images))

5621
5351


In [26]:
joint_relations = set(true_image_relations).intersection(set(false_image_relations))
len(joint_relations)

In [22]:
def get_object_pairs(image_list, relation):

    object_pairs = []
    captions = []
    image_links = []

    single_word_cats = {cat for cat in unique_cats if len(cat.split()) == 1}
    multi_word_cats = {cat for cat in unique_cats if len(cat.split()) == 2}

    for i in image_list:
        if i['relation'] == relation:
            caption_words = i['caption'].rstrip('.').split()
            matching_words = []
            j = 0
            while j < len(caption_words):
                if j < len(caption_words) - 1 and ' '.join(caption_words[j:j+2]) in multi_word_cats:
                    matching_words.append(' '.join(caption_words[j:j+2]))
                    j += 2  # Skip the next word
                elif caption_words[j] in single_word_cats:
                    matching_words.append(caption_words[j])
                    j += 1  # Move to the next word
                else:
                    j += 1  # Move to the next word if no match is found
            object_pairs.append(tuple(matching_words))
            captions.append(i['caption'])
            image_links.append(i['image_link'])
            
    return object_pairs, captions, image_links

In [23]:
def make_clickable(val):
    return '<a href="{}">{}</a>'.format(val, val)

In [30]:
def generate_relation_df(true_images, false_images, relation):
    true_pairs = get_object_pairs(true_images, relation)
    false_pairs = get_object_pairs(false_images, relation)

    false_matches = []
    true_matches = []

    for false_index, false_pair in enumerate(false_pairs[0]):
        for true_index, true_pair in enumerate(true_pairs[0]):
            if false_pair == true_pair:
                false_match = (false_pair, false_pairs[1][false_index], false_pairs[2][false_index])
                true_match = (true_pair, true_pairs[1][true_index], true_pairs[2][true_index])
                false_matches.append(false_match)
                true_matches.append(true_match)

    df = pd.DataFrame({
        'Object Pair': [i[0] for i in false_matches],
        'Input Caption': [i[1].replace(relation, 'not ' + relation) for i in false_matches],
        'Input Image': [i[2] for i in false_matches],
        'Edit Instruction': [f'Move the {i[0][0]} {relation} the {i[0][1]}.' for i in false_matches],
        'Output Caption': [i[1] for i in true_matches],
        'Output Image': [i[2] for i in true_matches]
    })
    
    return df

In [31]:
relation = 'right of'

df = generate_relation_df(true_images, false_images, relation)
df.style.format({'Input Image': make_clickable, 'Output Image': make_clickable})

Unnamed: 0,Object Pair,Input Caption,Input Image,Edit Instruction,Output Caption,Output Image
0,"('person', 'dining table')",The person is not right of the dining table.,http://images.cocodataset.org/train2017/000000065806.jpg,Move the person right of the dining table.,The person is right of the dining table.,http://images.cocodataset.org/train2017/000000502440.jpg
1,"('person', 'dining table')",The person is not right of the dining table.,http://images.cocodataset.org/train2017/000000065806.jpg,Move the person right of the dining table.,The person is right of the dining table.,http://images.cocodataset.org/train2017/000000199962.jpg
2,"('cat', 'umbrella')",The cat is not right of the umbrella.,http://images.cocodataset.org/train2017/000000541158.jpg,Move the cat right of the umbrella.,The cat is right of the umbrella.,http://images.cocodataset.org/train2017/000000038837.jpg
3,"('teddy bear', 'suitcase')",The teddy bear is not right of the suitcase.,http://images.cocodataset.org/train2017/000000336668.jpg,Move the teddy bear right of the suitcase.,The teddy bear is right of the suitcase.,http://images.cocodataset.org/train2017/000000248168.jpg
4,"('bench', 'bed')",The bench is not right of the bed.,http://images.cocodataset.org/train2017/000000017921.jpg,Move the bench right of the bed.,The bench is right of the bed.,http://images.cocodataset.org/train2017/000000262118.jpg
5,"('backpack', 'cat')",The backpack is not right of the cat.,http://images.cocodataset.org/train2017/000000506586.jpg,Move the backpack right of the cat.,The backpack is right of the cat.,http://images.cocodataset.org/train2017/000000005577.jpg
6,"('cow', 'bus')",The cow is not right of the bus.,http://images.cocodataset.org/train2017/000000162355.jpg,Move the cow right of the bus.,The cow is right of the bus.,http://images.cocodataset.org/train2017/000000084499.jpg
7,"('handbag', 'cat')",The handbag is not right of the cat.,http://images.cocodataset.org/train2017/000000470862.jpg,Move the handbag right of the cat.,The handbag is right of the cat.,http://images.cocodataset.org/train2017/000000014766.jpg
8,"('handbag', 'bed')",The handbag is not right of the bed.,http://images.cocodataset.org/train2017/000000084324.jpg,Move the handbag right of the bed.,The handbag is right of the bed.,http://images.cocodataset.org/train2017/000000069214.jpg
9,"('refrigerator', 'person')",The refrigerator is not right of the person.,http://images.cocodataset.org/train2017/000000442356.jpg,Move the refrigerator right of the person.,The refrigerator is right of the person.,http://images.cocodataset.org/train2017/000000574769.jpg


In [34]:
df_list = []
for relation in joint_relations:
    df = generate_relation_df(true_images, false_images, relation)
    df_list.append(df)
    
all_relations_df = pd.concat(df_list)


In [35]:
len(all_relations_df)

12898

In [36]:
all_relations_df.sample(10).style.format({'Input Image': make_clickable, 'Output Image': make_clickable})

Unnamed: 0,Object Pair,Input Caption,Input Image,Edit Instruction,Output Caption,Output Image
76,"('car', 'parking meter')",The car is not facing away from the parking meter.,http://images.cocodataset.org/train2017/000000447849.jpg,Move the car facing away from the parking meter.,The car is facing away from the parking meter.,http://images.cocodataset.org/train2017/000000169200.jpg
97,"('person', 'giraffe')",The person is not beneath the giraffe.,http://images.cocodataset.org/train2017/000000401123.jpg,Move the person beneath the giraffe.,The person is beneath the giraffe.,http://images.cocodataset.org/train2017/000000388074.jpg
627,"('person', 'cat')",The person is not touching the cat.,http://images.cocodataset.org/train2017/000000076619.jpg,Move the person touching the cat.,The person is touching the cat.,http://images.cocodataset.org/train2017/000000044719.jpg
144,"('person', 'banana')",The person is not touching the banana.,http://images.cocodataset.org/train2017/000000411327.jpg,Move the person touching the banana.,The person is touching the banana.,http://images.cocodataset.org/train2017/000000347121.jpg
192,"('pizza', 'person')",The pizza is not at the right side of the person.,http://images.cocodataset.org/train2017/000000163186.jpg,Move the pizza at the right side of the person.,The pizza is at the right side of the person.,http://images.cocodataset.org/val2017/000000248400.jpg
34,"('refrigerator', 'person')",The refrigerator is not behind the person.,http://images.cocodataset.org/train2017/000000546952.jpg,Move the refrigerator behind the person.,The refrigerator is behind the person.,http://images.cocodataset.org/train2017/000000041710.jpg
635,"('cat', 'suitcase')",The cat is not on top of the suitcase.,http://images.cocodataset.org/train2017/000000111171.jpg,Move the cat on top of the suitcase.,The cat is on top of the suitcase.,http://images.cocodataset.org/val2017/000000432468.jpg
396,"('motorcycle', 'truck')",The motorcycle is not in front of the truck.,http://images.cocodataset.org/val2017/000000426372.jpg,Move the motorcycle in front of the truck.,The motorcycle is in front of the truck.,http://images.cocodataset.org/train2017/000000387136.jpg
321,"('cat', 'suitcase')",The cat is not in the suitcase.,http://images.cocodataset.org/train2017/000000502604.jpg,Move the cat in the suitcase.,The cat is in the suitcase.,http://images.cocodataset.org/train2017/000000005142.jpg
223,"('motorcycle', 'bus')",The motorcycle is not behind the bus.,http://images.cocodataset.org/train2017/000000370543.jpg,Move the motorcycle behind the bus.,The motorcycle is behind the bus.,http://images.cocodataset.org/train2017/000000106722.jpg


In [11]:
# Random collection of relations for any given image (not antagonistic)

# [i for i in dataset['train'] if i['image'] == '000000155777.jpg']
# [i for i in dataset['dev'] if i['image'] == '000000155777.jpg']
# [i for i in dataset['test'] if i['image'] == '000000155777.jpg']

# we could try this form, but may be too noisy: start with 'outside' pair, then move object 1 inside object 2
# however, no reason input image needs to be 'outside', unless you want to be 100% sure the objects are 'not inside'. Otherwise, just needs to be 'not inside'.