
## Agents example


In [9]:

import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer


In [10]:

# Define the LLM Agent
class LLM_Agent:
    def __init__(self, model_name="gpt2"):
        
        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        self.model     = GPT2LMHeadModel.from_pretrained(model_name)

    def act(self, state):
        """
        Generate an action based on the state (text-based input).
        Args:
            state (str): Current state description.
        Returns:
            action (str): Generated action.
        """
        input_ids = self.tokenizer.encode(state, return_tensors="pt")
        output    = self.model.generate(input_ids, max_length=50, num_return_sequences=1, pad_token_id=self.tokenizer.eos_token_id)
        action    = self.tokenizer.decode(output[0], skip_special_tokens=True)
        
        return action.split("\n")[0]  # Take the first sentence/action


In [11]:

# Define a simple text-based environment
class SimpleTextEnv:
    def __init__(self):
        self.state = "You are in a room with two doors. One door leads to a treasure, the other to a trap."

    def reset(self):
        self.state = "You are in a room with two doors. One door leads to a treasure, the other to a trap."
        return self.state

    def step(self, action):
        """
        Take a step in the environment based on the agent's action.
        Args:
            action (str): The agent's action.
        Returns:
            next_state (str): The resulting state after the action.
            reward (float): The reward for the action.
            done (bool): Whether the episode has ended.
        """
        if "left" in action.lower():
            next_state = "You chose the left door and found a treasure!"
            reward = 1.0
            done = True
        elif "right" in action.lower():
            next_state = "You chose the right door and fell into a trap."
            reward = -1.0
            done = True
        else:
            next_state = "You are still in the room with two doors. Choose left or right."
            reward = 0.0
            done = False
        return next_state, reward, done



In [12]:


# Main Loop: LLM Agent interacting with the environment
def main():
    env = SimpleTextEnv()
    agent = LLM_Agent()

    state = env.reset()
    done = False
    total_reward = 0

    print(f"Initial state: {state}\n")

    while not done:
        # Agent acts
        action = agent.act(state)
        print(f"Agent's action: {action}")

        # Environment responds
        next_state, reward, done = env.step(action)
        print(f"Next state: {next_state}")
        print(f"Reward: {reward}\n")

        state = next_state
        total_reward += reward

    print(f"Episode finished with total reward: {total_reward}")

if __name__ == "__main__":
    main()



Initial state: You are in a room with two doors. One door leads to a treasure, the other to a trap.

Agent's action: You are in a room with two doors. One door leads to a treasure, the other to a trap. The trap is a trapdoor. The treasure is a treasure. The trapdoor is a trapdoor. The treasure is a treasure. The
Next state: You are still in the room with two doors. Choose left or right.
Reward: 0.0

Agent's action: You are still in the room with two doors. Choose left or right.
Next state: You chose the left door and found a treasure!
Reward: 1.0

Episode finished with total reward: 1.0
