## Query Generation from `query_templates`

In [4]:
import json
import os
from collections import defaultdict

# Define specific attribute vocabularies
known_colors = {
    "red", "blue", "green", "white", "black", "yellow", "brown",
    "orange", "pink", "purple", "gray"
}

known_shapes = {
    "round", "square", "triangular", "circular", "rectangular"
}

known_materials = {
    "wooden", "metal", "plastic", "glass", "leather", "fabric"
}

known_conditions = {
    "broken", "dirty", "clean", "wet", "dry", "old", "new"
}

wearable_parts = {
    "shirt", "t-shirt", "jacket", "hat", "pants", "shoes", "scarf",
    "dress", "jeans", "sneakers", "boots"
}

def get_position_descriptor(obj, objects):
    obj_type = obj['names'][0]
    same_type = [o for o in objects if o['names'][0] == obj_type]
    if len(same_type) <= 1:
        return ""
    x_center = obj['x'] + obj['w'] / 2
    sorted_centers = sorted([o['x'] + o['w'] / 2 for o in same_type])
    rank = sorted_centers.index(x_center)
    if rank == 0:
        return " on the left"
    elif rank == len(sorted_centers) - 1:
        return " on the right"
    else:
        return " in the center"

def classify_attribute(attr):
    attr = attr.lower()
    if attr in known_colors:
        return "color"
    if attr in known_shapes:
        return "shape"
    if attr in known_materials:
        return "material"
    if attr in known_conditions:
        return "condition"
    return None

def attribute_query(obj_name, attr, attr_type, position_desc):
    if attr_type == "color":
        return f"What color is the {obj_name}{position_desc}?"
    elif attr_type == "shape":
        return f"What is the shape of the {obj_name}{position_desc}?"
    elif attr_type == "material":
        return f"What material is the {obj_name}{position_desc} made of?"
    elif attr_type == "condition":
        return f"What is the condition of the {obj_name}{position_desc}?"
    return None

In [11]:
import os
os.environ["HF_HOME"] = "/scratch/anony_ai/cache/sentence_transformers"

import json
import time
import ijson
import random
from typing import List, Dict
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from models.mistral import get_correlated_objects, get_alternate_relationships
from tqdm import tqdm

class QueryGenerator:
    def __init__(self, templates_path="query_templates.json"):
        
        with open(templates_path) as f:
            self.templates = json.load(f)

        self.levels = {
            1: self.generate_level_1_queries,
            2: self.generate_level_2_queries,
            3: self.generate_level_3_queries,
            4: self.generate_level_4_queries,
            5: self.generate_level_5_queries,
        }

        print("Loading TF!")
        self.tf_model = SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/scratch/anony_ai/cache/sentence_transformers')
        print("TF loaded!")

    def generate_level_1_queries(self, scene_graphs: List[Dict], num_objects: int = 5) -> List[Dict]:
        
        queries = []
        level_templates = self.templates["level_1"]["templates"]
        
        for sg in tqdm(scene_graphs):
            # Get unique objects (avoid duplicates like multiple "bike" entries)
            unique_objects = list({obj["name"].lower() for obj in sg["objects"]})
            sampled_objects = random.sample(unique_objects, min(num_objects, len(unique_objects)))
            
            for obj in sampled_objects:
                template = random.choice(level_templates)
                query = template.replace("<object>", obj)
                
                queries.append({
                    "image_id": sg.get("url", ""),
                    "question": query,
                    "answer": "Yes",  # Always "Yes" since objects come from scene graph
                    "instruction": self.templates["level_1"]["instruction"],
                    "level": 1
                })
        
        return queries
    
    def generate_level_2_queries(self, scene_graphs: List[Dict], num_objects: int = 5) -> List[Dict]:
        queries = []
        level_templates = self.templates["level_2"]["templates"]
        
        for sg in tqdm(scene_graphs):
            # Get unique objects (avoid duplicates like multiple "bike" entries)
            unique_objects = list({obj["name"].lower() for obj in sg["objects"]})
            unique_objects = random.sample(unique_objects, min(num_objects, len(unique_objects)))
            
            sampled_objects = []
            
            for obj in unique_objects:
                correlated_objs = get_correlated_objects(obj, k=3)
                sampled_objects.extend(correlated_objs)
                time.sleep(0.5)

            sampled_objects_dummy=sampled_objects
            sampled_objects=[]
            
            similar_flag = False
            for corr_obj in sampled_objects_dummy:
                for obj in unique_objects:
                    embeddings = self.tf_model.encode([obj, corr_obj])
                    if cosine_similarity([embeddings[0]], [embeddings[1]])[0][0] > 0.7:
                        similar_flag = True
                        continue
                
                if not similar_flag:
                    sampled_objects.append(corr_obj)
                else:
                    similar_flag = False

            sampled_objects = list(set(sampled_objects))
            sampled_objects = random.sample(sampled_objects, min(num_objects, len(sampled_objects)))
            
            for obj in sampled_objects:
                template = random.choice(level_templates)
                query = template.replace("<object>", obj)
                
                queries.append({
                    "image_id": sg.get("url", ""),
                    "question": query,
                    "answer": "No",  # Also check if the object is present in the image or not
                    "instruction": self.templates["level_1"]["instruction"],
                    "level": 2
                })
        
        return queries

    def generate_level_3_queries(self, scene_graphs: List[Dict], num_objects: int = 5) -> List[Dict]:
        level_templates = self.templates["level_3"]["templates"]

        queries = []
        seen = set()

        for entry in tqdm(scene_graphs):
            image_id = entry.get("url")
        
            # Create mapping from object index to name
            obj_index_to_name = {i: obj["name"] for i, obj in enumerate(entry.get("objects", []))}
            
            for rel in entry.get("relationships", []):
                predicate = rel.get("predicate")
                subject_idx = rel.get("subject")
                object_idx = rel.get("object")
                
                # Get names using the indices
                subject = obj_index_to_name.get(subject_idx)
                obj = obj_index_to_name.get(object_idx)
                
                key = (image_id, subject, obj, predicate)
                if key in seen or subject == obj or not subject or not obj:
                    continue
                
                seen.add(key)

                template = random.choice(level_templates)
                query = template.replace("<object>", obj)
                query = query.replace("<subject>", subject)

                queries.append({
                    "image_id": image_id,
                    "question": query,
                    "answer": predicate,
                    "subject": subject,
                    "object": obj,
                    "level": 3
                })

        return queries

    def generate_level_4_queries(self, scene_graphs: List[Dict], num_relations: int = 5) -> List[Dict]:
        level_templates = self.templates["level_4"]["templates"]
        queries = []
        seen = set()

        for entry in tqdm(scene_graphs):
            image_id = entry.get("url")
            obj_index_to_name = {i: obj["name"] for i, obj in enumerate(entry.get("objects", []))}
            relationships = entry.get("relationships", [])

            # Extract unique relationships for the current scene graph
            unique_relationships = set(rel.get("predicate") for rel in relationships if rel.get("predicate"))
            formatted_relationships = ', '.join(f"'{rel}'" for rel in unique_relationships)

            sampled_relations = random.sample(relationships, min(num_relations, len(relationships)))
            
            for rel in sampled_relations:
                predicate = rel.get("predicate")
                subject_idx = rel.get("subject")
                object_idx = rel.get("object")

                subject = obj_index_to_name.get(subject_idx)
                obj = obj_index_to_name.get(object_idx)
                
                key = (image_id, subject, obj, predicate)
                
                if key in seen or subject == obj or not subject or not obj:
                    continue
                
                seen.add(key)

                # Pass unique relationships specific to the current scene graph
                alternate_relationships = get_alternate_relationships(
                    relationship=rel, object_name=obj, existing_relationships=formatted_relationships, subject_name=subject, k=3
                )
                time.sleep(0.5)

                if len(alternate_relationships) == 0:
                    continue
                
                template = random.choice(level_templates)
                query = template.replace("<object>", obj)
                query = query.replace("<subject>", subject)

                queries.append({
                    "image_id": image_id,
                    "question": query,
                    "answer": predicate,
                    "options": alternate_relationships,
                    "subject": subject,
                    "object": obj,
                    "level": 4
                })
        
        return queries

    def generate_level_5_queries(self, scene_graphs, num_attributes=5):
        
        queries_by_type = defaultdict(list)
        print("Level 5")
        
        for graph in tqdm(scene_graphs):
            
            objects = graph['objects']
            objects = random.sample(objects, min(10, len(objects)))
            for obj in objects:
                if 'names' not in obj or not obj['names']:
                    continue

                obj_name = obj['names'][0].lower()
                attributes = obj.get('attributes', [])
                if not attributes:
                    continue

                attributes = random.sample(attributes, min(3, len(attributes)))

                position_desc = get_position_descriptor(obj, objects)

                for attr in attributes:
                    attr_type = classify_attribute(attr)
                    if not attr_type:
                        continue

                    query = attribute_query(obj_name, attr, attr_type, position_desc)
                    if query:
                        queries_by_type[attr_type].append({
                            "image_id": graph['image_id'],
                            "object": obj_name,
                            "attribute": attr,
                            "query": query,
                            "answer": attr
                        })
                        
        return queries_by_type

    def generate_queries(self, scene_graphs_path: str, output_path: str, level: int = 1):
       
        with open(scene_graphs_path) as f:
            scene_graphs = json.load(f)
        
        if level == 5:
            image_ids = [item["url"] for item in scene_graphs]
            filtered_scene_graphs = []
            with open("./VisualGenome/scene_graphs.json") as f:
                parser = ijson.items(f, "item")
                for graph in parser:
                    if graph["image_id"] in image_ids:
                        filtered_scene_graphs.append(graph)

            queries = self.levels[level](filtered_scene_graphs)
            
        else:              
                
            queries = self.levels[level](scene_graphs)
        
        with open(output_path, 'w') as f:
            json.dump(queries, f, indent=2)
        
        print(f"Generated {len(queries)} Level {level} queries at {output_path}")

In [2]:
qg = QueryGenerator()
qg.generate_queries(
    scene_graphs_path="./assets/final_subset_19.json",
    output_path="./queries/queries_level_1.json",
    level=1
)

Loading TF!
TF loaded!


100%|██████████| 211/211 [00:00<00:00, 50175.65it/s]

Generated 1055 Level 1 queries at ./queries/queries_level_1.json





In [2]:
qg = QueryGenerator()
qg.generate_queries(
    scene_graphs_path="./assets/final_subset_19.json",
    output_path="./queries/queries_level_2.json",
    level=2
)

Loading TF!
TF loaded!


100%|██████████| 211/211 [17:24<00:00,  4.95s/it]

Generated 1051 Level 2 queries at ./queries/queries_level_2.json





In [3]:
qg = QueryGenerator()
qg.generate_queries(
    scene_graphs_path="./assets/final_subset_19.json",
    output_path="./queries/queries_level_3.json",
    level=3
)

Loading TF!
TF loaded!


100%|██████████| 211/211 [00:00<00:00, 27662.24it/s]

Generated 935 Level 3 queries at ./queries/queries_level_3.json





In [4]:
qg = QueryGenerator()
qg.generate_queries(
    scene_graphs_path="./assets/final_subset_19.json",
    output_path="./queries/queries_level_4.json",
    level=4
)

Loading TF!
TF loaded!


100%|██████████| 211/211 [10:09<00:00,  2.89s/it]

Generated 352 Level 4 queries at ./queries/queries_level_4.json





: 

In [12]:
qg = QueryGenerator()
qg.generate_queries(
    scene_graphs_path="./assets/final_subset_19.json",
    output_path="./queries/queries_attr.json",
    level=5
)

Loading TF!
TF loaded!
Level 5


100%|██████████| 211/211 [00:00<00:00, 30459.41it/s]

Generated 4 Level 5 queries at ./queries/queries_attr.json





: 

## Extrinsic Object Hallucination

Generated 93 Level 4 queries at ./queries/queries_level_4.json


## Attributes

In [None]:
import json
import os
from collections import defaultdict

# Define specific attribute vocabularies
known_colors = {
    "red", "blue", "green", "white", "black", "yellow", "brown",
    "orange", "pink", "purple", "gray"
}

known_shapes = {
    "round", "square", "triangular", "circular", "rectangular"
}

known_materials = {
    "wooden", "metal", "plastic", "glass", "leather", "fabric"
}

known_conditions = {
    "broken", "dirty", "clean", "wet", "dry", "old", "new"
}

wearable_parts = {
    "shirt", "t-shirt", "jacket", "hat", "pants", "shoes", "scarf",
    "dress", "jeans", "sneakers", "boots"
}

def get_position_descriptor(obj, objects):
    obj_type = obj['names'][0]
    same_type = [o for o in objects if o['names'][0] == obj_type]
    if len(same_type) <= 1:
        return ""
    x_center = obj['x'] + obj['w'] / 2
    sorted_centers = sorted([o['x'] + o['w'] / 2 for o in same_type])
    rank = sorted_centers.index(x_center)
    if rank == 0:
        return " on the left"
    elif rank == len(sorted_centers) - 1:
        return " on the right"
    else:
        return " in the center"

def classify_attribute(attr):
    attr = attr.lower()
    if attr in known_colors:
        return "color"
    if attr in known_shapes:
        return "shape"
    if attr in known_materials:
        return "material"
    if attr in known_conditions:
        return "condition"
    return None

def attribute_query(obj_name, attr, attr_type, position_desc):
    if attr_type == "color":
        return f"What color is the {obj_name}{position_desc}?"
    elif attr_type == "shape":
        return f"What is the shape of the {obj_name}{position_desc}?"
    elif attr_type == "material":
        return f"What material is the {obj_name}{position_desc} made of?"
    elif attr_type == "condition":
        return f"What is the condition of the {obj_name}{position_desc}?"
    return None

def generate_attribute_hallucination_queries(scene_graphs_path, image_ids, output_path):
    with open(scene_graphs_path, 'r') as f:
        scene_graphs = json.load(f)

    queries_by_type = defaultdict(list)

    for graph in scene_graphs:
        if graph['image_id'] not in image_ids:
            continue

        objects = graph['objects']
        for obj in objects:
            if 'names' not in obj or not obj['names']:
                continue

            obj_name = obj['names'][0].lower()
            attributes = obj.get('attributes', [])
            if not attributes:
                continue

            position_desc = get_position_descriptor(obj, objects)

            for attr in attributes:
                attr_type = classify_attribute(attr)
                if not attr_type:
                    continue

                query = attribute_query(obj_name, attr, attr_type, position_desc)
                if query:
                    queries_by_type[attr_type].append({
                        "image_id": graph['image_id'],
                        "object": obj_name,
                        "attribute": attr,
                        "query": query,
                        "answer": attr
                    })

    with open(output_path, 'w') as f:
        json.dump(queries_by_type, f, indent=2)
    print(f"Saved {sum(len(v) for v in queries_by_type.values())} queries with answers to {output_path}")

# Example usage
image_ids = [1, 3, 7, 8, 10, 18, 19, 20, 21, 22, 23, 24, 25]
scene_graphs_path = "./VisualGenome/scene_graphs.json"
output_path = "attribute_hallucination_queries_with_answers.json"

generate_attribute_hallucination_queries(scene_graphs_path, image_ids, output_path)
