In [1]:
# Imports
import openai
import os
import requests
from utils import check_affordance_func
from typing import List
import requests


# Notebook Imports
from IPython.display import Image
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
from utils import generate_image
from utils import get_completion
from pathlib import Path
from segment import GibsonSAM
from config import CLASSES
from dataclasses import dataclass



In [2]:
@dataclass
class ModelConfig:
    HOME: str = "/home/aregbs/Desktop/gibson-afford"
    GROUNDING_DINO_CONFIG_PATH: str = os.path.join(HOME, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py")
    GROUNDING_DINO_CHECKPOINT_PATH: str = os.path.join(HOME, "weights", "groundingdino_swint_ogc.pth")
    SAM_CHECKPOINT_PATH: str = os.path.join(HOME, "weights", "sam_hq_vit_h.pth")
    SAM_ENCODER_VERSION: str = "vit_h"

In [3]:
os.environ['OPENAI_API_KEY'] = ""
openai.api_key = os.getenv("OPENAI_API_KEY")

In [4]:
image_path= "/home/aregbs/Desktop/gibson-afford/gen_data/demo/tabletop-scene.png"
output_folder = "tabletop-scene"
DATA_DIR = Path.cwd() / output_folder

DATA_DIR.mkdir(exist_ok=True)

gibson = GibsonSAM(image_path, CLASSES, model_config= ModelConfig())
labels = gibson.get_image_annotations(DATA_DIR)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


final text_encoder_type: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>




In [5]:
labels

['plate', 'knife', 'mug', 'tomato']

In [6]:
from pathlib import PosixPath

paths = Path(DATA_DIR)
paths_ = list(paths.iterdir())

In [7]:
def get_completion_func(template, model="gpt-3.5-turbo-0613"):
    """
    params:
        prompt (str):
        template (str):
        return (List[Dict[int, str]])
    """

    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f"{template}"}
    ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0 # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]
  

In [8]:
def extract_actions(text, paths_list):
    def map_paths_to_items(actions, paths_list):
        # Create a dictionary that associates each item name with its PosixPath
        path_dict = {}
        for path in paths_list:
            item_name = path.stem  # Extract the item name from the path (e.g., 'tomato' from 'tomato.png')
            path_dict[item_name] = str(path)

        # Update the list of action dictionaries
        for action in actions:
            item = action['Performed on'].lower()  # Convert to lowercase to match dictionary keys
            path = path_dict.get(item)  # Get the corresponding path, if it exists
            if path is not None:
                action['Performed on'] = path  # Replace 'Performed on' value with path

        return actions

    actions = text.split('###')[1:]  # Ignore the first split result because it's empty

    # Parse each action into a dictionary
    actions_dict = []
    for action in actions:
        lines = action.split('\n')
        action_dict = {}
        for line in lines:
            if line.startswith('Performed on:'):
                action_dict['Performed on'] = line.split(': ')[1]
            elif line.startswith('Effect:'):
                action_dict['Effect'] = line.split(': ')[1]
        actions_dict.append(action_dict)

    # Map the paths to the actions
    updated_actions = map_paths_to_items(actions_dict, paths_list)

    return updated_actions

In [9]:
def process_dalle_images(response, filename, image_dir):
    # save the images
    urls = [datum["url"] for datum in response["data"]]  # extract URLs
    images = [requests.get(url).content for url in urls]  # download images
    image_names = [f"{filename}_{i + 1}.png" for i in range(len(images))]  # create names
    filepaths = [os.path.join(image_dir, name) for name in image_names]  # create filepaths
    for image, filepath in zip(images, filepaths):  # loop through the variations
        with open(filepath, "wb") as image_file:  # open the file
            image_file.write(image)  # write the image to the file

    return filepaths

In [10]:
labels = tuple(labels)

In [11]:
environment  = 'tabletop'

query_template =  f"""Imagine {labels} on a {environment} 
What are the possible interactions between these objects? Specify answer by giving names of Action, object performed on, instrument used to perform the action and the effect on the object. 
An example is:
###
Action:Peel
Performed on: Onion 
Instrument: knife
Effect: peeled(onion)
### 
present the result in the format of the example starting with ### at the begining Action
"""
    

In [12]:
query_template

"Imagine ('plate', 'knife', 'mug', 'tomato') on a tabletop \nWhat are the possible interactions between these objects? Specify answer by giving names of Action, object performed on, instrument used to perform the action and the effect on the object. \nAn example is:\n###\nAction:Peel\nPerformed on: Onion \nInstrument: knife\nEffect: peeled(onion)\n### \npresent the result in the format of the example starting with ### at the begining Action\n"

In [13]:
afford = get_completion_func(query_template)

In [14]:
afford

'### Action: Cut\nPerformed on: Tomato\nInstrument: Knife\nEffect: Cut(tomato)\n\n### Action: Slice\nPerformed on: Tomato\nInstrument: Knife\nEffect: Sliced(tomato)\n\n### Action: Stir\nPerformed on: Mug\nInstrument: Spoon\nEffect: Stirred(mug)\n\n### Action: Pour\nPerformed on: Mug\nInstrument: None\nEffect: Filled(mug)\n\n### Action: Serve\nPerformed on: Plate\nInstrument: None\nEffect: Placed(tomato) on plate\n\n### Action: Place\nPerformed on: Plate\nInstrument: None\nEffect: Placed(knife) on plate\n\n### Action: Place\nPerformed on: Plate\nInstrument: None\nEffect: Placed(mug) on plate'

In [15]:
updated_actions = extract_actions(afford, paths_)

print(updated_actions)

[{'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png', 'Effect': 'Cut(tomato)'}, {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png', 'Effect': 'Sliced(tomato)'}, {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png', 'Effect': 'Stirred(mug)'}, {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png', 'Effect': 'Filled(mug)'}, {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png', 'Effect': 'Placed(tomato) on plate'}, {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png', 'Effect': 'Placed(knife) on plate'}, {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png', 'Effect': 'Placed(mug) on plate'}]


In [16]:
updated_actions

[{'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png',
  'Effect': 'Cut(tomato)'},
 {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png',
  'Effect': 'Sliced(tomato)'},
 {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png',
  'Effect': 'Stirred(mug)'},
 {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png',
  'Effect': 'Filled(mug)'},
 {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png',
  'Effect': 'Placed(tomato) on plate'},
 {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png',
  'Effect': 'Placed(knife) on plate'},
 {'Performed on': '/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png',
  'Effect': 'Placed(mug) on plate'}]

In [18]:
mask_path

'/'

In [19]:
for edit in updated_actions:
    prompt = edit["Effect"]
    mask_path = edit["Performed on"]
    print(f"Performed on: {mask_path}")
    print(f"Effect: {prompt}\n")

    edit_response = openai.Image.create_edit(
        image=open(image_path, "rb"),
       mask=open(mask_path, "rb"), 
        prompt=prompt,
        size="512x512",
        response_format="url"
    )

    edit_filepaths = process_dalle_images(edit_response, f"Image_{prompt}", DATA_DIR)

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png
Effect: Cut(tomato)

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png
Effect: Sliced(tomato)

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png
Effect: Stirred(mug)

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png
Effect: Filled(mug)

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png
Effect: Placed(tomato) on plate

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png
Effect: Placed(knife) on plate

Performed on: /home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png
Effect: Placed(mug) on plate



In [None]:
from pathlib import Path
import json

def extract_actions(text, paths_list):
    def map_paths_to_items(actions, paths_list):
        # Create a dictionary that associates each item name with its PosixPath
        path_dict = {}
        for path in paths_list:
            item_name = path.stem  # Extract the item name from the path (e.g., 'tomato' from 'tomato.png')
            path_dict[item_name] = str(path)

        # Update the list of action dictionaries
        for action in actions:
            item = action['Performed on'].lower()  # Convert to lowercase to match dictionary keys
            path = path_dict.get(item)  # Get the corresponding path, if it exists
            if path is not None:
                action['Performed on'] = path  # Replace 'Performed on' value with path

        return actions

    actions = text.split('###')[1:]  # Ignore the first split result because it's empty

    # Parse each action into a dictionary
    actions_dict = []
    for action in actions:
        lines = action.split('\n')
        action_dict = {}
        for line in lines:
            if line.startswith('Performed on:'):
                action_dict['Performed on'] = line.split(': ')[1]
            elif line.startswith('Effect:'):
                action_dict['Effect'] = line.split(': ')[1]
        actions_dict.append(action_dict)

    # Map the paths to the actions
    updated_actions = map_paths_to_items(actions_dict, paths_list)

    return updated_actions

# Your list of PosixPath objects
# Using PosixPath
paths_list = [
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/knife.png'),
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png'),
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png'),
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png')
]


# Your action text
text = '### Action: Cut\nPerformed on: Tomato\nInstrument: Knife\nEffect: Cut tomato\n\n### Action: Slice\nPerformed on: Tomato\nInstrument: Knife\nEffect: Sliced tomato\n\n### Action: Pour\nPerformed on: Mug\nInstrument: N/A\nEffect: Filled mug\n\n### Action: Place\nPerformed on: Tomato\nInstrument: N/A\nEffect: Placed tomato on plate\n\n### Action: Place\nPerformed on: Mug\nInstrument: N/A\nEffect: Placed mug on table\n\n### Action: Arrange\nPerformed on: Plate\nInstrument: N/A\nEffect: Arranged objects on plate (plate, knife, mug, tomato)'


updated_act = extract_actions(text, paths_list)

print(updated_act)


In [None]:
paths_l = [
 PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/knife.png'),
 PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png'),
 PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomatoe.png'),
 PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png')
 
 ]

In [None]:
paths_li = [
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/knife.png'),
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/mug.png'),
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/tomato.png'),
    PosixPath('/home/aregbs/Desktop/gibson-afford/gen_data/tabletop-scene/plate.png')
]


In [None]:
list(paths_)

In [None]:
extract_actions(text, paths_li)