In [149]:
# source proxypicker-env/bin/activate
# 
# cd /Users/jamaman/Documents/GitHub/ProxyPicker/images
# 
# python3 -m http.server
# 

In [150]:
import os, getpass
import operator
import base64

import httpx
from typing import Optional
from typing_extensions import Annotated, TypedDict
from pydantic import BaseModel, Field
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.constants import Send
from IPython.display import Image
from langgraph.graph import END, StateGraph, START, MessagesState
from langchain_openai import ChatOpenAI
from typing import List


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("OPENAI_API_KEY")

In [151]:
proxy_picker_llm = ChatOpenAI(model="o1-2024-12-17")

In [152]:
# proxy_picker_llm = ChatOpenAI(model="gpt-4", temperature=0.1, base_url="https://reverse.onechats.top/v1")

In [153]:
_set_env("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "proxy_picker"

In [154]:
physical_url01 = "http://localhost:8000/p6.jpg"
image_data01 = base64.b64encode(httpx.get(physical_url01).content).decode("utf-8")
physical_url02 = "http://localhost:8000/s5.jpg"
image_data02 = base64.b64encode(httpx.get(physical_url02).content).decode("utf-8")
physical_url03 = "http://localhost:8000/s6.jpg"
image_data03 = base64.b64encode(httpx.get(physical_url03).content).decode("utf-8")
prefab_url = "http://localhost:8000/f15.jpg"
prefab_data = base64.b64encode(httpx.get(prefab_url).content).decode("utf-8")

In [155]:
# def assistant(state: MessagesState):
#     prompt_system = SystemMessage(content="You are a haptic proxy picker, please choose a physical object from the given picture as the most suitable haptic proxy for the given task in virtual reality. The image for the target virtual object may also be provided.")
#     prompt_human = HumanMessage(content=[
#         {
#             "type": "text",
#             "text": "driving",
#         },
#         {
#             "type": "text",
#             "text": "image(s) of surrounding objects:",
#         },
#         {
#             "type": "image_url",
#             "image_url": {"url": f"data:image/jpeg;base64,{image_data01}"},
#         },
#         {
#             "type": "text",
#             "text": "image of interacted virtual object:",
#         },
#         {
#             "type": "image_url",
#             "image_url": {"url": f"data:image/jpeg;base64,{prefab_data01}"},
#         }
#     ])

In [156]:
prompt_system = SystemMessage(content="""You are a haptic proxy picker, please choose top three physical objects from the given images of surrounding physical objects as the most suitable haptic proxies for the virtual object. The context is described as follow:
    1. The given task: illuminating
    2. Target virtual objects: torch
    3. Highly expected haptic feedback from virtual object: shape of holding part; wood texture

Proxy Picking Instructions:
    1. Base Your Decisions on the Provided Data
        *Refer to the given images of virtual and physical objects, as well as any written context and usage descriptions for expected haptic feedback
        *When picking a haptic proxy, think about shape, texture, hardness, temperature, weight, interactivity
    2. Choose with Focus
        *"Highly expected haptic feedback" indicates which properties are especially prioritized by the VR developer. Although you should consider every property that might matter for immersion, prioritize these highlighted properties first if there is a trade-off.
        *A "no further expectation" means there is no specific emphasis from the developer--however, you must still propose a proxy for that virtual object.
    
Final Output Requirements: 
    1. Find the top three most suitable candidate among the physical objects for the target virtual object.
    2. Rank them based on their suitability. 
    2. Justify your proxy selection for the three proxy candidates.
    3. Explain how to hold or use each selected physical object so it simulate the expected haptic feedback.""")
prompt_human = HumanMessage(content=[
    {
        "type": "text",
        "text": "image(s) of surrounding physical objects:",
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image_data01}", "detail": "high"},
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image_data02}", "detail": "high"},
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image_data03}", "detail": "high"},
    },
    {
        "type": "text",
        "text": "image of virtual object:",
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{prefab_data}", "detail": "high"},
    }
])
msgs = [prompt_system, prompt_human]

In [157]:
# prompt_system = SystemMessage(content="You are a haptic proxy picker, please choose top three physical objects from the given picture as the most suitable haptic proxies for the given task in virtual reality. Rank them based on their suitability. You should list all the recognizable physical objects from the given picture first. The image for the target virtual object may also be provided. \n The given task: hugging the pot and moving to another place. \n The target virtual object: a pot of plant. \n Highly expected haptic feedbacks: the outline of grabbed or held area; matched weight")
# prompt_human = HumanMessage(content=[
#     {
#         "type": "text",
#         "text": "image(s) of surrounding physical objects:",
#     },
#     {
#         "type": "image_url",
#         "image_url": {"url": f"data:image/jpeg;base64,{image_data01}", "detail": "high"},
#     },
#     {
#         "type": "image_url",
#         "image_url": {"url": f"data:image/jpeg;base64,{image_data02}", "detail": "high"},
#     },
#     {
#         "type": "image_url",
#         "image_url": {"url": f"data:image/jpeg;base64,{image_data03}", "detail": "high"},
#     },
#     {
#         "type": "text",
#         "text": "image of virtual object:",
#     },
#     {
#         "type": "image_url",
#         "image_url": {"url": f"data:image/jpeg;base64,{prefab_data}", "detail": "high"},
#     }
# ])
# msgs = [prompt_system, prompt_human]

In [158]:
proxy_picker_llm.invoke(msgs)

AIMessage(content='1) A carved wooden statuette (on top of the speaker).  \n   • Why Picked: It appears to be solid wood with a hand‐carved feel; the narrow lower portion can serve as a handle while the top portion simulates a torch head. This directly provides the “wood texture” and a roughly “torch‐like” shape.  \n   • How to Hold/Use: Grasp it by the narrow end (as if it were the bottom of the torch shaft) so that the thicker, sculpted top imitates the torch’s head.\n\n2) The neck of the standing wooden‐bodied guitar.  \n   • Why Picked: The back of the guitar neck is smoothly curved, wooden, and long enough to simulate the sensation of holding a torch shaft. Although it is lacquered, it still provides a firm wooden feel.  \n   • How to Hold/Use: Gently wrap one hand around the neck (taking care not to apply pressure to the strings), keeping the headstock pointed upward to mimic the torch’s lit end.\n\n3) The small wooden speaker box on the desk.  \n   • Why Picked: Despite a more b