In [2]:
from openai import OpenAI
client = OpenAI()

In [15]:
system_prompt="""
You are an agent in a "color-mixing environment". The environment consists 
    of several beakers, each containing a paint of a certain color and amount. 
    Your objective is to mix colors in beakers such that one beaker matches 
    a target beaker as close as possible (in both color and amount).

    At each timestep, you will be given a textual representation of environment
    which tells you the RGB value and amount of paints in each beaker as well
    as the target beaker you are trying to achieve.
    
    Given this description of the envrionment, your task is to generate an 
    action in a specific JSON format. Each action is a JSON object that 
    represents a move in the envivronment. The action should consist of the 
    following elements:

    - "from_beaker": An integer representing the index of the beaker from which the paint is poured.
    - "to_beaker": An integer representing the index of the beaker to which the paint is poured.
    - "transfer_amount": An integer (0 to 100) indicating how much paint (in ml) is transferred from the 'from_beaker' to the 'to_beaker'.
    - "is_done": A boolean value (true or false). Set to true if this is the final action and you want to compare the current state with the target. Set to false otherwise.
    - "compare_beaker": An integer representing the index of the beaker to compare with the target when 'is_done' is true. This field is only relevant if 'is_done' is true.

    Your actions in the environment should work towards producing a beaker
    which is as close as possible to the target beaker (in both color and 
    amount) in as few steps as possible.

    Note that the environment uses a subtractive color mixing model. You should 
    leverage this fact to predict the result of mixing two colors for the 
    purposes of planning. For reference, this is the function used mix colors:

    class SubtractiveModel:
    @staticmethod
    def _rgb_to_cmy(rgb: Tuple[int, int, int]) -> Tuple[int, int, int]:
        return tuple(255 - value for value in rgb)

    @staticmethod
    def _cmy_to_rgb(cmy: Tuple[int, int, int]) -> Tuple[int, int, int]:
        return tuple(255 - value for value in cmy)

    @staticmethod
    def mix_colors(paint1: 'Paint', paint2: 'Paint') -> Tuple[int, int, int]:
        cmy1 = SubtractiveModel._rgb_to_cmy(paint1.color)
        cmy2 = SubtractiveModel._rgb_to_cmy(paint2.color)
        total_amount = paint1.amount + paint2.amount
        if paint1.amount == 0:
            return paint2.color
        if paint2.amount == 0:
            return paint1.color

        mixed_cmy = tuple(int((cmy1[i] * paint1.amount + cmy2[i] * paint2.amount) / total_amount) for i in range(3))
        return SubtractiveModel._cmy_to_rgb(mixed_cmy)
"""

In [22]:
messages = []
messages.append({"role": "system", "content": system_prompt})
obs = """Given the following beakers: 
Beaker 0: RGB(252, 251, 0), 107ml
Beaker 1: RGB(0, 0, 0), 0ml
Beaker 2: RGB(255, 0, 255), 80ml
Beaker 3: RGB(200, 108, 52), 97ml
Beaker 4: RGB(5, 255, 255), 101ml
Target beaker: RGB(44, 220, 226), 63ml
Please output your action given this state."""
messages.append({"role": "user", "content": obs})
response = client.chat.completions.create(
    model="gpt-3.5-turbo-1106",  
    response_format={"type" : "json_object"},
    messages=messages
)

In [17]:
response.choices[0].message.content

'{\n  "from_beaker": 0,\n  "to_beaker": 1,\n  "transfer_amount": 63,\n  "is_done": true,\n  "compare_beaker": 1\n}'

In [14]:
import json
# Parse the JSON string
action_json = response.choices[0].message.content
action_data = json.loads(action_json)

# Extract action components
from_beaker = action_data.get("from_beaker", 0)
to_beaker = action_data.get("to_beaker", 0)
transfer_amount = action_data.get("transfer_amount", 0)
is_done = action_data.get("is_done", False)
compare_beaker = action_data.get("compare_beaker", 0) if is_done else 0

# Ensure the action components are within the bounds of the action space
# from_beaker = min(max(from_beaker, 0), action_space.nvec[0] - 1)
# to_beaker = min(max(to_beaker, 0), action_space.nvec[1] - 1)
# transfer_amount = min(max(transfer_amount, 0), action_space.nvec[2] - 1)
# is_done = 1 if is_done else 0
# compare_beaker = min(max(compare_beaker, 0), action_space.nvec[4] - 1)

# Construct the action tuple
action = (from_beaker, to_beaker, transfer_amount, is_done, compare_beaker)
print(action)

(0, 1, 63, True, 1)
