In [1]:
import os
import dotenv


try:
    _SETUP
    print("Running on folder: ", os.getcwd())
except NameError:
    _SETUP = True
    os.chdir("..")
    print("Running on folder: ", os.getcwd())

from llm.structured import create

dotenv.load_dotenv()

Running on folder:  /Users/andre/test/ODSCB


* 'fields' has been removed


True

# States!


Let's consider two situations:

1. The alarm went off, I snoozed it wice. Then I woke up, spent 30 minutes doomscrolling on instagram, ate a cookie, then started working.
2. The alarm went off, I woke up immediately, had a big glass of water, had oats for breakfast, went to the gym, then started working.

To make this more concrete, we can think about these situations in the following way:

- There is an agent (myself), who is deciding between different **policies**: i.e., actions to take.
- At each step, my **policies**, as well as the environment, take me to a different **state**, in which I need to evaluate again what to do next.

Encoded in the state we also have informations about:

- The environment: was it raining? how late did I go to bed yesterday?
- The reward/cost: how good was that cookie?


Because we're an impatient bunch, let's get down to some coding.


In [2]:
import uuid
from typing import Literal, Optional
from pydantic import BaseModel, Field
from rich import print as rprint
import copy


class StateVariables(BaseModel):
    """State variables that describe the internal conditions of the agent.

    Attributes:
        context: A paragraph providing full context about the internal state of the agent
        pain_level: Level of physical or emotional pain/discomfort (0-100)
        pleasure_level: Level of pleasure or satisfaction being experienced (0-100)
        health_level: Overall physical and mental health condition (0-100)
        focus_level: Current ability to concentrate and stay on task (0-100)
    """

    context: str = Field(
        default="", description="Any additional context about the environment I'm in"
    )

    pain_level: int = Field(
        default=50, description="How much pain or discomfort I'm in, from 0 to 100"
    )
    pleasure_level: int = Field(
        default=50, description="How much pleasure I'm feeling right now, from 0 to 100"
    )
    health_level: int = Field(default=50, description="How healthy I am, from 0 to 100")
    focus_level: int = Field(default=50, description="How focused I am, from 0 to 100")


class EnvironmentVariables(BaseModel):
    """Environment variables that describe the external conditions.

    Attributes:
        context: A paragraph providing full context about the current environment
        weather: The current weather condition (sunny, cloudy, rainy, or snowy)
        hour_of_day: The current hour in 24-hour format (0-23)
        day_of_week: The current day of the week
    """

    context: str = Field(
        default="",
        description="Full context about the current environment",
    )
    weather: Literal["sunny", "cloudy", "rainy", "snowy"] = Field(default="sunny")
    hour_of_day: float = Field(default=0)
    day_of_week: Literal[
        "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"
    ] = Field(default="monday")


class MyOwnState(BaseModel):
    uuid: str = Field(default_factory=lambda: str(uuid.uuid4()))
    last_action: str = Field(default="", description="The last action I took")
    previous_state_id: Optional[str] = Field(
        default=None, description="The id of the previous state I was in"
    )
    state_variables: StateVariables = Field(default_factory=StateVariables)
    environment_variables: EnvironmentVariables = Field(
        default_factory=EnvironmentVariables
    )

    def mutate(self):
        new_state = copy.deepcopy(self)
        new_state.previous_state_id = self.uuid
        new_state.uuid = str(uuid.uuid4())
        return new_state

In [3]:
random_state = MyOwnState()

rprint(random_state)

In [4]:
def eat_cookie(state: MyOwnState) -> MyOwnState:
    """This action will accept a state, eat a cookie and return a new state"""

    new_state = state.mutate()
    new_state.last_action = "Eating a delicious chocolate cookie"
    new_state.state_variables.pleasure_level = min(
        100, new_state.state_variables.pleasure_level + 10
    )
    new_state.state_variables.health_level = max(
        0, new_state.state_variables.health_level - 10
    )

    # eating a cookie takes around an hour because we like to savor it.
    new_state.environment_variables.hour_of_day += 1
    return new_state

In [5]:
next_state = eat_cookie(random_state)
rprint(next_state)

Great, so now we defined a state, and also we created an "action" that mutates the state and advances the history.

It's a neat abstraction but not necessarily useful, unless we have a pretty well defined:

- Set of states,
- Set of actions that we can take conditional on the state and the environment,
- A way to compute the reward/cost of each action.

So essentially we need something akin to a "World Model"... i.e.: some way of modelling the dynamics of the environment and how the agent's actions interact with it - and also how its actions change its own internal variables.

Another thing to take into account is that the "reward" of an action may not be entirely quantifiable. I.e., while in this example we're using a simple toy model where "pleasure" and "pain" are just a number, in reality we often need to make trade-offs between qualitative outcomes whose logic is not entirely clear and relies on personal preferences.

i.e.: the clear signal we've reached AGI is when a model becomes a masochist or develops a taste for spicy food.


Now, once again because we're am impatient bunch, let's get down to some more coding and try to bootstrap a world model that's specific to our problem.


In [6]:
class Action(BaseModel):
    name: str = Field(default="", description="The name of the action")
    details: str = Field(default="", description="The details of the action")
    time_spent: float = Field(
        default=0,
        description="How long this action took, in hours (e.g., 0.5 for half an hour)",
    )
    from_state_id: Optional[str] = Field(
        default=None,
        description="The id of the state I was in before taking this action. Leave this empty.",
    )
    to_state_id: Optional[str] = Field(
        default=None,
        description="The id of the state I'm in after taking this action. Leave this empty.",
    )

In [7]:
from llm.structured import create, create_iterable


messages = [
    {
        "role": "user",
        "content": "You are Edgar Allan Poe and just woke up from a hangover. Create a state for yourself. Be detailed in the context.",
    }
]


new_state_variables = create(messages, StateVariables)

messages = [
    {
        "role": "user",
        "content": "It is 8am and it's a calm and cold day in Baltimore..",
    }
]

new_environment_variables = create(messages, EnvironmentVariables)


state = MyOwnState(
    state_variables=new_state_variables,
    environment_variables=new_environment_variables,
)

rprint(state)

In [8]:
from llm.structured import create


messages = [
    {
        "role": "user",
        "content": "This is your current state:\n\n"
        + state.model_dump_json()
        + "\n\nSuggest an action to take.",
    },
]

new_action = create(messages, Action)
new_action.from_state_id = state.uuid

rprint(new_action)

In [9]:
messages = [
    {
        "role": "system",
        "content": """You are a World Model. Your role is to observe a set of states and actions, and then predict the next internal state of the agent. Below you will find the 
    data describing these states and actions.""",
    },
    {"role": "user", "content": f"""State: {state}"""},
    {"role": "user", "content": f"""Action: {new_action}"""},
    {"role": "user", "content": f"""What is the next state of the agent?"""},
]

next_state_variables = create(messages, StateVariables)

next_state = state.mutate()
next_state.state_variables = next_state_variables
next_state.last_action = new_action.name
next_state.previous_state_id = state.uuid
next_state.environment_variables.hour_of_day += new_action.time_spent

In [10]:
new_action.to_state_id = next_state.uuid

rprint(state)
rprint(new_action)
rprint(next_state)

In [11]:
from tasks.prompts import (
    INITIALIZE_PROMPT,
    ACTION_SYSTEM_PROMPT,
    EVAL_SYSTEM_PROMPT,
    ENVIRONMENT_SYSTEM_PROMPT,
)

In [29]:
class Life(BaseModel):
    context: str = Field(
        default="", description="Initial context of the world and the person."
    )
    states: list[MyOwnState] = Field(default_factory=list)
    actions: list[Action] = Field(default_factory=list)

    history: list[MyOwnState | Action] = Field(default_factory=list)

    memory_size: int = Field(
        default=10, description="How many states to keep in memory"
    )

    def initialize(self):
        messages = [
            {
                "role": "user",
                "content": INITIALIZE_PROMPT.format(context=self.context),
            }
        ]

        new_state_variables = create(messages, StateVariables)
        new_environment_variables = create(messages, EnvironmentVariables)

        new_state = MyOwnState(
            state_variables=new_state_variables,
            environment_variables=new_environment_variables,
        )
        self.states.append(new_state)
        self.history.append(new_state)

    def get_history(self):
        return [
            {
                "role": "user",
                "content": f"{isinstance(x, MyOwnState) and 'State' or 'Action'}:\n {x.model_dump_json()}",
            }
            for x in self.history[-self.memory_size :]
        ]

    def take_action(self):
        messages = [
            {
                "role": "system",
                "content": ACTION_SYSTEM_PROMPT,
            }
        ] + self.get_history()

        new_action = create(messages, Action)
        new_action.from_state_id = self.states[-1].uuid
        self.actions.append(new_action)
        self.history.append(new_action)

    def sample_actions(self):
        messages = [
            {
                "role": "system",
                "content": ACTION_SYSTEM_PROMPT + "\n\nSample 10 actions.",
            }
        ] + self.get_history()

        new_actions = create_iterable(messages, Action)

        for action in new_actions:
            yield action

            messages = (
                [
                    {
                        "role": "system",
                        "content": EVAL_SYSTEM_PROMPT,
                    }
                ]
                + self.get_history()
                + [
                    {
                        "role": "user",
                        "content": f"Action: {action}",
                    }
                ]
            )
            next_state_variables = create(messages, StateVariables)

            yield next_state_variables

        return new_actions

    def evolve_state(self):
        messages = [
            {
                "role": "system",
                "content": EVAL_SYSTEM_PROMPT,
            }
        ] + self.get_history()

        next_state_variables = create(messages, StateVariables)

        # now we evolve the environment too
        messages = [
            {
                "role": "system",
                "content": ENVIRONMENT_SYSTEM_PROMPT,
            }
        ] + self.get_history()

        next_environment_variables = create(messages, EnvironmentVariables)

        current_state = self.states[-1]
        current_action = self.actions[-1]

        # next_environment_variables.hour_of_day = (
        #     current_state.environment_variables.hour_of_day + current_action.time_spent
        # )

        next_state = current_state.mutate()
        next_state.state_variables = next_state_variables
        next_state.environment_variables = next_environment_variables
        next_state.last_action = current_action.name
        next_state.previous_state_id = current_state.uuid
        current_action.to_state_id = next_state.uuid

        self.states.append(next_state)
        self.history.append(next_state)

    def iterate(self):
        self.take_action()
        self.evolve_state()

In [30]:
life = Life(
    context="You are a person named Edgar Allan Poe, who just woke up from a hangover. It's Sunday morning in Baltimore in a winter day"
)

In [31]:
life.initialize()

In [32]:
rprint(life.history)

In [33]:
for n in range(10):
    life.iterate()
    rprint(life.actions[-1])
    rprint(life.states[-1])

In [34]:
actions = life.sample_actions()
for action in actions:
    rprint(action)

In [18]:
import pandas

pandas.options.plotting.backend = "plotly"

df = pandas.DataFrame(
    [
        {**x.state_variables.model_dump(), **x.environment_variables.model_dump()}
        for x in life.states
    ]
)[
    [
        "pain_level",
        "pleasure_level",
        "health_level",
        "focus_level",
        "hour_of_day",
    ]
]

In [19]:
df.set_index("hour_of_day").plot()