### Dataset Prepration

In [2]:
import pandas as pd

In [3]:
# This file has varied commands for each operation
df = pd.read_csv('sentences_iccv2.csv')
operation_command_map = df.groupby('operation')['sentence'].apply(list).to_dict()

In [4]:
# Loading the dataset info and merging it with normalized values
df = pd.read_csv("./IMAD/imad_dense_data.csv")
norm_val = pd.read_csv("imad_dense_data_value_normalized.csv")["value_normalized"].tolist()

df['value_normalized'] = norm_val

In [5]:
# Find the unique images
df["img_name"] = df["img_source"].apply(lambda x: x.split("/")[-1].split(".")[0])
all_images_list = df["img_name"].tolist()
print(len(all_images_list))
print(len(set(all_images_list)))
unique_images = set(all_images_list)
print(len(unique_images))

22131
5000
5000


In [6]:
# Sampling validation and test data
import random

random.seed(42)

val_imgs = random.sample(unique_images, 1000)
print(len(val_imgs))

test_imgs = random.sample(val_imgs, 500)
print(len(test_imgs))

1000
500


since Python 3.9 and will be removed in a subsequent version.
  val_imgs = random.sample(unique_images, 1000)


In [7]:
# Template sentences for converting outputs to text - for data with only one action
single_actions = ["An action is applied to this image. The action and its corresponding value is\n{action}: {value}",
                  "In this image, an action is applied, and its corresponding value is\n{action}: {value}",
                  "An action is implemented in this image, and its corresponding value is\n{action}: {value}",
                  "In this image, an action enacted and its corresponding value is\n{action}: {value}"]

In [8]:
# Template sentences for converting outputs to text - for data with multiple actions
multiple_actions = ["The applied actions and their corresponding values in this image are\n{actions_values}",
                    "In this image, the actions applied and their corresponding value are\n{actions_values}",
                    "The actions are implemented in this image, and their corresponding values are\n{actions_values}",
                    "In this image, the actions enacted and their corresponding value are\n{actions_values}"]

In [9]:
import ast


annot_train = {"annotations": []}
annot_val = {"annotations": []}
annot_test = {"annotations": []}


for index, row in df.iterrows():
    
    img_name = row["img_name"]
    actions = ast.literal_eval(row["action"])
    values = ast.literal_eval(row["value_normalized"])
    # values = ast.literal_eval(row["value"])
    values = [str(round(item, 2)) for item in values]
    inst = row["textv2"]
    if inst[-1] == " ":
        inst = inst[:-1]
    
    if len(actions) == 1:
        inst = random.choice(operation_command_map[actions[0]])
        pred = random.choice(single_actions)
        pred = pred.format(action=actions[0], value=values[0])
    elif len(actions) > 1:
        pred = random.choice(multiple_actions)
        temp_txt = ""
        for act, val in zip(actions, values):
            temp_txt += act + ": " + val + ",\n"
            # temp_txt += act + ",\n"
        temp_txt = temp_txt[:-2]
        pred = pred.format(actions_values=temp_txt)
    
    temp_dict = {}
    temp_dict = {"image_id":img_name, "prediction":pred + " ", "command": inst}
    
    if img_name in test_imgs:
        annot_test["annotations"].append(temp_dict)
    elif img_name in val_imgs:
        annot_val["annotations"].append(temp_dict)
    else:
        annot_train["annotations"].append(temp_dict)

In [10]:
print(len(annot_train["annotations"]))
print(len(annot_test["annotations"]))
print(len(annot_val["annotations"]))
print(len(annot_train["annotations"]) + len(annot_val["annotations"]) + len(annot_test["annotations"]))

17700
2230
2201
22131


In [11]:
import json

with open('./IMAD/filter_cap_train_comp_sen_norm.json', 'w') as f:
    json.dump(annot_train, f)
    
with open('./IMAD/filter_cap_test_comp_sen_norm.json', 'w') as f:
    json.dump(annot_test, f)
    
with open('./IMAD/filter_cap_valid_comp_sen_norm.json', 'w') as f:
    json.dump(annot_val, f)