In [None]:
!pip install datasets
from datasets import load_dataset

In [None]:
objects_dataset = load_dataset("visual_genome", "objects_v1.2.0", split='train')
attributes_dataset = load_dataset("visual_genome", "attributes_v1.2.0", split='train')
relationships_dataset = load_dataset("visual_genome", "relationships_v1.2.0", split='train')

In [None]:
from datasets import Dataset

# Initialize dictionaries to store the extracted data and their associated image_ids
extracted_objects = []
extracted_attributes = []
extracted_relationships = []

# Define the batch processing functions
def process_objects(batch):
    for item in batch['objects']:
        if item is not None:  # Check if the 'objects' field is not None
            extracted_objects.extend(item)

def process_attributes(batch):
    for i, attrs in enumerate(batch['attributes']):
        # Retrieve the corresponding image_id for the current list of attributes
        current_image_id = batch['image_id'][i] if isinstance(batch['image_id'], list) else batch['image_id']
        if attrs:  # Check if the 'attributes' field is not None
            for attr in attrs:
                if attr:  # If the attribute is not None
                    # Add the current image_id to the attribute
                    attr['image_id'] = current_image_id
                    extracted_attributes.append(attr)

def process_relationships(batch):
    for item in batch['relationships']:
        if item is not None:  # Check if the 'relationships' field is not None
            extracted_relationships.extend(item)

In [None]:
objects_dataset.map(process_objects, batched=True, batch_size=1000)

In [None]:
attributes_dataset.map(process_attributes, batched=True, batch_size=1000)

In [None]:
relationships_dataset.map(process_relationships, batched=True, batch_size=1000)

In [None]:
#question 1:  Identify images that show different weather conditions impacting daily life, such as people using umbrellas during rain, wearing sunglasses on sunny days, or wrapping in scarves during snow.

In [None]:
def filter_images_with_weather_impacts(extracted_objects, extracted_attributes, extracted_relationships):
    people_ids = set()
    weather_related_objects_ids = set()
    outdoor_elements_ids = set()
    image_ids = set()

    # Step 1: Filter for People, Weather-related objects, and Outdoor elements from extracted objects
    for obj in extracted_objects:
        if 'person' in obj['names'] or 'man' in obj['names'] or 'woman' in obj['names']:
            people_ids.add(obj['object_id'])
        if 'umbrella' in obj['names'] or 'sunglasses' in obj['names'] or 'scarf' in obj['names']:
            weather_related_objects_ids.add(obj['object_id'])
        if 'sky' in obj['names'] or 'cloud' in obj['names'] or 'sun' in obj['names']:
            outdoor_elements_ids.add(obj['object_id'])

    print(f"Step 1 - People IDs: {people_ids}")
    print(f"Step 1 - Weather-related Objects IDs: {weather_related_objects_ids}")
    print(f"Step 1 - Outdoor Elements IDs: {outdoor_elements_ids}")

    # Step 2: Identify relationships between people and weather-related objects or outdoor elements
    for rel in extracted_relationships:
        if (rel['subject']['object_id'] in people_ids and (rel['object']['object_id'] in weather_related_objects_ids or rel['object']['object_id'] in outdoor_elements_ids)) or \
           (rel['object']['object_id'] in people_ids and (rel['subject']['object_id'] in weather_related_objects_ids or rel['subject']['object_id'] in outdoor_elements_ids)):
            image_ids.add(rel['image_id'])

    print(f"Step 2 - Potential Image IDs from relationships: {image_ids}")

    # Step 3: Confirm these image IDs based on attributes of weather and outdoor presence
    confirmed_image_ids = set()
    for attr in extracted_attributes:
        if (attr['object_id'] in weather_related_objects_ids or attr['object_id'] in outdoor_elements_ids) and attr['image_id'] in image_ids:
            if any(attr_val.lower() in ['rainy', 'sunny', 'snowy', 'cold', 'warm'] for attr_val in attr['attributes']):
                confirmed_image_ids.add(attr['image_id'])

    print(f"Step 3 - Confirmed Image IDs: {confirmed_image_ids}")

    return list(confirmed_image_ids)

# Assuming extracted_objects, extracted_attributes, and relationships_dataset are prepared
image_ids = filter_images_with_weather_impacts(extracted_objects, extracted_attributes, extracted_relationships)

In [None]:
df = attributes_dataset.to_pandas()
df[df['image_id'].isin(image_ids)]['url'].to_numpy()

In [None]:
#question 2: Find images of people (men or women) in urban settings where they are interacting with nature, such as walking dogs on grassy patches beside skyscrapers, sitting under trees in city squares, or jogging past large water fountains

In [None]:
def filter_images_with_people_nature_urban(extracted_objects, extracted_attributes, extracted_relationships):
    people_ids = set()
    nature_elements_ids = set()
    urban_elements_ids = set()
    outdoor_elements_ids = set()
    image_ids = set()

    # Step 1: Filter for People, Nature Elements, Urban Elements, and Outdoor Elements from extracted objects
    for obj in extracted_objects:
        if 'person' in obj['names'] or 'man' in obj['names'] or 'woman' in obj['names']:
            people_ids.add(obj['object_id'])
        if 'dog' in obj['names'] or 'tree' in obj['names'] or 'grass' in obj['names'] or 'fountain' in obj['names']:
            nature_elements_ids.add(obj['object_id'])
        if 'building' in obj['names'] or 'skyscraper' in obj['names'] or 'square' in obj['names'] or 'street' in obj['names']:
            urban_elements_ids.add(obj['object_id'])
        if 'sky' in obj['names'] or 'cloud' in obj['names'] or 'sun' in obj['names']:
            outdoor_elements_ids.add(obj['object_id'])

    print(f"Step 1 - People IDs: {people_ids}")
    print(f"Step 1 - Nature Elements IDs: {nature_elements_ids}")
    print(f"Step 1 - Urban Elements IDs: {urban_elements_ids}")
    print(f"Step 1 - Outdoor Elements IDs: {outdoor_elements_ids}")

    # Step 2: Identify relationships that connect people with nature elements in urban settings and outdoors
    for rel in extracted_relationships:
        if (rel['subject']['object_id'] in people_ids and (rel['object']['object_id'] in nature_elements_ids or rel['object']['object_id'] in urban_elements_ids or rel['object']['object_id'] in outdoor_elements_ids)) or \
           (rel['object']['object_id'] in people_ids and (rel['subject']['object_id'] in nature_elements_ids or rel['subject']['object_id'] in urban_elements_ids or rel['subject']['object_id'] in outdoor_elements_ids)):
            image_ids.add(rel['image_id'])

    print(f"Step 2 - Potential Image IDs from relationships: {image_ids}")

    # Step 3: Confirm these image IDs by ensuring attributes meet the urban and nature interaction criteria
    confirmed_image_ids = set()
    for attr in extracted_attributes:
        if (attr['object_id'] in nature_elements_ids or attr['object_id'] in urban_elements_ids or attr['object_id'] in outdoor_elements_ids) and attr['image_id'] in image_ids:
            if any(attr_val.lower() in ['urban', 'natural', 'outdoor', 'green', 'architectural'] for attr_val in attr['attributes']):
                confirmed_image_ids.add(attr['image_id'])

    print(f"Step 3 - Confirmed Image IDs: {confirmed_image_ids}")

    return list(confirmed_image_ids)

# Assuming extracted_objects, extracted_attributes, and relationships_dataset are prepared
image_ids = filter_images_with_people_nature_urban(extracted_objects, extracted_attributes, extracted_relationships)



In [None]:
df[df['image_id'].isin(image_ids)]['url'].to_numpy()

In [None]:
#question 3: Show scenes where various forms of transportation are depicted in close proximity to different architectural styles, such as old wooden boats near modern glass buildings, classic cars parked next to colonial houses, or bicycles leaning against brick walls.

In [None]:
def filter_images_with_transportation_architecture(extracted_objects, extracted_attributes, extracted_relationships):
    transportation_ids = set()
    architecture_ids = set()
    outdoor_elements_ids = set()
    image_ids = set()

    # Step 1: Filter for Transportation and Architectural Elements from extracted objects
    for obj in extracted_objects:
        if 'boat' in obj['names'] or 'car' in obj['names'] or 'bicycle' in obj['names']:
            transportation_ids.add(obj['object_id'])
        if 'building' in obj['names'] or 'house' in obj['names'] or 'wall' in obj['names']:
            architecture_ids.add(obj['object_id'])
        if 'sky' in obj['names'] or 'cloud' in obj['names'] or 'sun' in obj['names']:
            outdoor_elements_ids.add(obj['object_id'])

    print(f"Step 1 - Transportation IDs: {transportation_ids}")
    print(f"Step 1 - Architectural IDs: {architecture_ids}")
    print(f"Step 1 - Outdoor Elements IDs: {outdoor_elements_ids}")

    # Step 2: Identify relationships that connect transportation with architectural styles
    for rel in extracted_relationships:
        if (rel['subject']['object_id'] in transportation_ids and rel['object']['object_id'] in architecture_ids) or \
           (rel['object']['object_id'] in transportation_ids and rel['subject']['object_id'] in architecture_ids):
            image_ids.add(rel['image_id'])

    print(f"Step 2 - Potential Image IDs from relationships: {image_ids}")

    # Step 3: Confirm these image IDs by ensuring attributes meet the specified criteria of being next to each other
    confirmed_image_ids = set()
    for attr in extracted_attributes:
        if (attr['object_id'] in architecture_ids or attr['object_id'] in transportation_ids) and attr['image_id'] in image_ids:
            if any(attr_val.lower() in ['wooden', 'modern', 'glass', 'colonial', 'brick'] for attr_val in attr['attributes']):
                confirmed_image_ids.add(attr['image_id'])

    print(f"Step 3 - Confirmed Image IDs: {confirmed_image_ids}")

    return list(confirmed_image_ids)

# Assuming extracted_objects, extracted_attributes, and relationships_dataset are prepared
image_ids = filter_images_with_transportation_architecture(extracted_objects, extracted_attributes, extracted_relationships)


In [None]:
df[df['image_id'].isin(image_ids)]['url'].to_numpy()

In [None]:
#question 4: Show images of colorful gardens where you can see different types of flowers in both urban and rural settings, possibly with people enjoying the scenery or tending to the plants.

In [None]:
def filter_images_with_colorful_gardens(extracted_objects, extracted_attributes, extracted_relationships):
    garden_ids, flower_ids, people_ids = set(), set(), set()
    outdoor_elements_ids = set()
    image_ids = set()

    # Step 1: Filter for Gardens, Flowers, and People from extracted objects
    for obj in extracted_objects:
        if 'garden' in obj['names']:
            garden_ids.add(obj['object_id'])
        if 'flower' in obj['names']:
            flower_ids.add(obj['object_id'])
        if 'man' in obj['names'] or 'woman' in obj['names'] or 'person' in obj['names']:
            people_ids.add(obj['object_id'])
        if 'sky' in obj['names'] or 'cloud' in obj['names'] or 'sun' in obj['names']:
            outdoor_elements_ids.add(obj['object_id'])

    print(f"Step 1 - Garden IDs: {garden_ids}")
    print(f"Step 1 - Flower IDs: {flower_ids}")
    print(f"Step 1 - People IDs: {people_ids}")
    print(f"Step 1 - Outdoor Elements IDs: {outdoor_elements_ids}")

    # Step 2: Identify relationships involving gardens and flowers, and possibly people
    for rel in extracted_relationships:
        if (rel['subject']['object_id'] in garden_ids and rel['object']['object_id'] in flower_ids) or \
           (rel['object']['object_id'] in garden_ids and rel['subject']['object_id'] in flower_ids) or \
           (rel['subject']['object_id'] in people_ids and rel['object']['object_id'] in garden_ids):
            image_ids.add(rel['image_id'])

    print(f"Step 2 - Potential Image IDs from relationships: {image_ids}")

    # Step 3: Confirm these image IDs by ensuring attributes meet the specified criteria of colorful and diverse
    confirmed_image_ids = set()
    for attr in extracted_attributes:
        if attr['object_id'] in flower_ids and attr['image_id'] in image_ids:
            if any(color in [a.lower() for a in attr['attributes'] if a] for color in ['colorful', 'diverse']):
                confirmed_image_ids.add(attr['image_id'])

    print(f"Step 3 - Confirmed Image IDs: {confirmed_image_ids}")

    return list(confirmed_image_ids)

# Assuming extracted_objects, extracted_attributes, and relationships_dataset are prepared
image_ids = filter_images_with_colorful_gardens(extracted_objects, extracted_attributes, extracted_relationships)



In [None]:
df[df['image_id'].isin(image_ids)]['url'].to_numpy()

In [None]:
#question 5: Depict scenes where individuals are engaged in outdoor sports, such as playing basketball outside, skateboarding down city sidewalks, or practicing yoga in serene park settings

In [None]:
def filter_images_with_outdoor_sports(extracted_objects, extracted_attributes, extracted_relationships):
    sports_ids, outdoor_elements_ids = set(), set()
    image_ids = set()

    # Step 1: Filter for sports-related objects and general outdoor elements
    for obj in extracted_objects:
        if any(sport in obj['names'] for sport in ['basketball', 'skateboarding', 'yoga', 'soccer', 'tennis', 'baseball', 'running', 'cycling', 'volleyball', 'golf', 'surfing', 'swimming', 'rowing', 'hiking', 'rock climbing', 'rugby', 'frisbee', 'hockey', 'martial arts', 'archery']):
            sports_ids.add(obj['object_id'])
        if 'grass' in obj['names'] or 'park' in obj['names'] or 'sky' in obj['names'] or 'cloud' in obj['names'] or 'sun' in obj['names']:  # Confirm outdoor setting
            outdoor_elements_ids.add(obj['object_id'])

    print(f"Step 1 - Sports-related IDs: {sports_ids}")
    print(f"Step 1 - Outdoor Elements IDs: {outdoor_elements_ids}")

    # Step 2: Identify relationships that indicate sports activities occurring in outdoor settings
    for rel in extracted_relationships:
        if (rel['subject']['object_id'] in sports_ids and rel['object']['object_id'] in outdoor_elements_ids) or \
           (rel['object']['object_id'] in sports_ids and rel['subject']['object_id'] in outdoor_elements_ids):
            image_ids.add(rel['image_id'])

    print(f"Step 2 - Potential Image IDs from relationships: {image_ids}")

    # Step 3: Confirm these image IDs are truly depicting outdoor sports activities
    confirmed_image_ids = set()
    for attr in extracted_attributes:
        if attr['object_id'] in sports_ids and attr['image_id'] in image_ids:
            if attr['attributes'] and any(activity in [a.lower() for a in attr['attributes'] if a] for activity in ['sport', 'playing', 'practicing']):
                confirmed_image_ids.add(attr['image_id'])

    print(f"Step 3 - Confirmed Image IDs: {confirmed_image_ids}")

    return list(confirmed_image_ids)

# Assuming extracted_objects, extracted_attributes, and relationships_dataset are prepared
image_ids = filter_images_with_outdoor_sports(extracted_objects, extracted_attributes, extracted_relationships)



In [None]:
df[df['image_id'].isin(image_ids)]['url'].to_numpy()