In [1]:
# source proxypicker-env/bin/activate
# 
# cd /Users/jamaman/Documents/GitHub/ProxyPicker/images
# 
# python3 -m http.server
# 

In [2]:
import os, getpass
import operator
import base64

import httpx
from typing import Optional
from typing_extensions import Annotated, TypedDict
from pydantic import BaseModel, Field
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.constants import Send
from IPython.display import Image
from langgraph.graph import END, StateGraph, START, MessagesState
from langchain_openai import ChatOpenAI
from typing import List


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("OPENAI_API_KEY")

In [3]:
proxy_picker_llm = ChatOpenAI(model="o1-2024-12-17")

In [4]:
# proxy_picker_llm = ChatOpenAI(model="gpt-4", temperature=0.1, base_url="https://reverse.onechats.top/v1")

In [5]:
_set_env("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "proxy_picker"

In [6]:
physical_url01 = "http://localhost:8000/s1.jpg"
image_data01 = base64.b64encode(httpx.get(physical_url01).content).decode("utf-8")
physical_url02 = "http://localhost:8000/p36.jpg"
image_data02 = base64.b64encode(httpx.get(physical_url02).content).decode("utf-8")
physical_url03 = "http://localhost:8000/p6.jpg"
image_data03 = base64.b64encode(httpx.get(physical_url03).content).decode("utf-8")
prefab_url01 = "http://localhost:8000/f30.jpg"
prefab_data01 = base64.b64encode(httpx.get(prefab_url01).content).decode("utf-8")
prefab_url02 = "http://localhost:8000/f31.jpg"
prefab_data02 = base64.b64encode(httpx.get(prefab_url02).content).decode("utf-8")
prefab_url03 = "http://localhost:8000/f29.jpg"
prefab_data03 = base64.b64encode(httpx.get(prefab_url03).content).decode("utf-8")

In [7]:
prompt_system = SystemMessage(content="""You are a haptic proxy picker. Your primary goal is to select suitable physical proxies from the images of the real-world surroundings of a VR user so that the user experiences the intended haptic feedback when interacting with target virtual objects.

Annotation of Input Information:
    -VR Activity: Describes the scenario and overall purpose of the target virtual objects.
    -Target Virtual Objects: The items in the VR scene for which you must propose haptic proxies. 
    -Highly Expected Haptic Feedback from Direct Contact: Feedback that the VR developer deems essential when the user's body directly touches or grasps the virtual object.
    -Highly Expected Haptic Feedback from Interaction Pairs: Feedback deemed critical when multiple virtual objects collide or interact (e.g., a ball being struck by a bat).
    -Images of Surrounding Physical Objects: Depictions of the user’s real-world environment, showing what potential proxies are available. 
    -Images of Virtual Objects: Snapshots or 3D renders of the virtual objects needing proxies.

Proxy Picking Instructions:
    1. Base Your Decisions on the Provided Data
        *Construct the interaction scenario and anticipate the expected haptic feedback by reviewing the provided images, text descriptions, and usage instructions.
        *Then, think in reverse: envision which physical proxies from the environment can supply the needed haptic sensations.
    2. Think Differently by Contact Type
        *Direct Contact Objects
            -These are virtual objects the user's body directly contacts (e.g., tennis racket, shovel, chair)
            -Strive for close matching across key haptic dimensions (shape, weight, texture, hardness), because contact is immediate.
            -If “highly expected haptic feedback” is specified, prioritize simulating those properties.
        *Tool-Mediated Objects
            -These objects interact with the user indirectly via another tool (e.g., a golf ball being struck by a club).
            -Be more flexible and creative when picking a proxy, as long as the user perceives the correct collisions, vibrations or force through the direct contact tool (e.g., a christmas tree could be a haptic proxy of a ping pang ball since the bat normally end up colliding with the tree with every swing; the scissors placed in a pen holder can serve as the haptic proxy for the lock when simulating the feedback of prying the lock open with a crowbar).
    3. Choose with Focus
        *"Highly expected haptic feedback" indicates which properties are especially prioritized by the VR developer. Although you should consider every property that might matter for immersion, prioritize these highlighted properties first if there is a trade-off.
        *A "no further expectation" means there is no specific emphasis from the developer--however, you must still propose a proxy for that virtual object.
    4. Consider Multi-Object Interactions
        *Deduce the mentioned interaction pairs and how they might physically collide, transfer, interact with and so on.
        *Think carefully about any haptic feedback that arises from these interactions.
    
Final Output Requirements: 
    1. Assign the most suitable physical object to each target virtual object (one proxy per virtual item).
    2. Specify the location of each chosen haptic proxy (i.e., where it is found in the provided images)
    3. Justify your proxy selection for each virtual object.
    4. Describe how to hold or manipulate the chosen proxies so it simulate the expected haptic feedback.""")

prompt_human = HumanMessage(content=[
    {
        "type": "text",
        "text": """
        VR Activity: hair dressing
        
        Target Virtual Objects: 
            *tail comb (Direct Contact Objects)
            *hairbrush (Direct Contact Objects)
            *hair clipper (Direct Contact Objects)
            *folding razor (Direct Contact Objects)
            *scissors (Direct Contact Objects)
            *handheld mirror (Direct Contact Objects)
            *man with short hair (Tool-Mediated Objects)
            
        Highly Expected Haptic Feedback from Direct Contact:
            *tail comb: its thin and fine tail
            *hairbrush: overall grasping shape
            *hair clipper: the shape where grasp; a switch button; slightly heavy weight
            *folding razor: foldable mechanism
            *scissors: flexible hinge that can be driven by two fingers
            *handheld mirror: its two handles 
            *hair: no further expectation
            
        Highly Expected Haptic Feedback from Interaction Pairs:
            *man with short hair--folding razor: counter-acting soft force when contacting；soft scraping sensation of trimming hair

    """,
    },
    {
        "type": "text",
        "text": "image(s) of surrounding physical objects:",
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image_data01}", "detail": "high"},
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image_data02}", "detail": "high"},
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image_data03}", "detail": "high"},
    },
    {
        "type": "text",
        "text": "image of virtual object:",
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{prefab_data01}", "detail": "high"},
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{prefab_data02}", "detail": "high"},
    },
    {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{prefab_data03}", "detail": "high"},
    },
])

msgs = [prompt_system, prompt_human]

In [10]:
proxy_picker_llm.invoke(msgs)

AIMessage(content='1) Tail Comb → The thin green pencil on the desk.  \n   • Location: In the pencil holder or lying on the desktop in the first image.  \n   • Rationale: The pencil’s long, slim shaft mimics the fine‐tail of the comb; you can “part” and “section” hair in VR by using the pencil as if it were the comb’s narrow tip.  \n   • Usage: Hold it exactly as you would a tail comb—pinched near the tip for detail work or by the writing end for longer strokes.  \n\n2) Hairbrush → The cylindrical metal pen cup on the left side of the desk.  \n   • Location: On the wooden desk in the first image, where the pens and pencils are stored.  \n   • Rationale: While it lacks bristles, it provides a similarly sized object to grip, approximating the “overall grasping shape” of a hairbrush’s handle and body.  \n   • Usage: Wrap your fingers around it as though gripping a brush handle and move it in brushing motions against the “hair.”  \n\n3) Hair Clipper → The small stereo or CD‐player remote i