<a href="https://colab.research.google.com/drive/1WmPECHLZcidTU6CnwWyaJMgL1e1AClHu?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>

In [1]:
!pip install -qU google-generativeai

In [2]:
import google.generativeai as genai
import getpass
import random

Get free-tier Google's Gemini API Key here: https://aistudio.google.com/app/apikey

In [3]:
API_KEY = getpass.getpass("Enter your Google API key: ")

Enter your Google API key: ··········


In [4]:
genai.configure(api_key=API_KEY)

In [5]:
class RAPAgent:
    def __init__(self, environment):
        self.model = genai.GenerativeModel("gemini-2.0-flash-exp")
        self.environment = environment

    def world_model(self, state, action):
        """LLM simulates state transition"""
        prompt = f"""Simulate what happens after this action:

        Current State: {state}
        Action: {action}

        Predict new state (JSON format):"""

        response = self.model.generate_content(prompt).text

        # Try to parse as dict, fallback to string
        try:
            import json
            new_state = json.loads(response.strip())
        except:
            # Simple parsing
            new_state = state.copy()
            if "increase" in action.lower():
                for key in new_state:
                    if isinstance(new_state[key], (int, float)):
                        new_state[key] += 10
            elif "decrease" in action.lower():
                for key in new_state:
                    if isinstance(new_state[key], (int, float)):
                        new_state[key] = max(0, new_state[key] - 10)

        return new_state

    def reward_model(self, state, goal):
        """Evaluate how good a state is"""
        prompt = f"""Rate this state toward the goal (0-10):

        Goal: {goal}
        Current State: {state}

        Score (just number):"""

        response = self.model.generate_content(prompt).text

        try:
            score = float(response.strip().split()[0])
            return min(max(score / 10, 0), 1)
        except:
            return 0.5

    def generate_actions(self, state):
        """Generate possible actions from current state"""
        prompt = f"""Given this state, suggest 3 possible actions:

      State: {state}

      Actions (one per line):"""

        response = self.model.generate_content(prompt).text
        actions = [line.strip() for line in response.split("\n") if line.strip()][:3]
        return actions

    def mcts_search(self, state, goal, depth=3):
        """Monte Carlo Tree Search with LLM world model"""
        best_action = None
        best_reward = -1

        print(f"🌲 MCTS Planning (depth={depth})...\n")

        # Generate possible actions
        actions = self.generate_actions(state)

        for action in actions:
            print(f"  Testing: {action}")
            total_reward = 0

            # Simulate multiple rollouts
            for rollout in range(2):
                sim_state = state.copy()
                rollout_reward = 0

                # Simulate future steps
                for step in range(depth):
                    # World model predicts next state
                    sim_state = self.world_model(sim_state, action)

                    # Reward model evaluates state
                    reward = self.reward_model(sim_state, goal)
                    rollout_reward += reward

                    # Generate next action for continued simulation
                    if step < depth - 1:
                        next_actions = self.generate_actions(sim_state)
                        action = next_actions[0] if next_actions else action

                total_reward += rollout_reward

            avg_reward = total_reward / 2
            print(f"    → Avg reward: {avg_reward:.2f}\n")

            if avg_reward > best_reward:
                best_reward = avg_reward
                best_action = actions[actions.index(action)]

        return best_action, best_reward

    def execute_action(self, action):
        """Execute action in real environment"""
        print(f"⚡ Executing: {action}")
        new_state = self.environment.execute(action)
        print(f"   New state: {new_state}\n")
        return new_state

    def plan_and_execute(self, goal, max_steps=5):
        """Main RAP loop: simulate, plan, execute"""
        print(f"\n{'='*60}")
        print(f"🎯 Goal: {goal}")
        print(f"{'='*60}\n")

        state = self.environment.get_state()
        print(f"📊 Initial state: {state}\n")

        for step in range(max_steps):
            print(f"{'─'*60}")
            print(f"STEP {step + 1}/{max_steps}")
            print(f"{'─'*60}\n")

            # Planning: Use MCTS with world model
            best_action, expected_reward = self.mcts_search(state, goal)

            print(f"🏆 Selected action: {best_action}")
            print(f"   Expected reward: {expected_reward:.2f}\n")

            # Execution: Take real action
            state = self.execute_action(best_action)

            # Evaluate real outcome
            actual_reward = self.reward_model(state, goal)
            print(f"📈 Actual reward: {actual_reward:.2f}\n")

            # Check if goal reached
            if actual_reward > 0.8:
                print(f"✅ Goal achieved!\n")
                break

        print(f"{'='*60}")
        print(f"📊 Final state: {state}")
        print(f"{'='*60}\n")

        return state

In [6]:
# Simple environment simulator
class Environment:
    def __init__(self, initial_state):
        self.state = initial_state

    def get_state(self):
        return self.state

    def execute(self, action):
        """Execute action and return new state"""
        new_state = self.state.copy()

        # Parse action and update state
        action_lower = action.lower()

        if "collect" in action_lower or "gather" in action_lower:
            new_state["resources"] = new_state.get("resources", 0) + 15

        elif "build" in action_lower or "construct" in action_lower:
            cost = 20
            if new_state.get("resources", 0) >= cost:
                new_state["structures"] = new_state.get("structures", 0) + 1
                new_state["resources"] -= cost

        elif "research" in action_lower or "study" in action_lower:
            new_state["knowledge"] = new_state.get("knowledge", 0) + 10

        elif "train" in action_lower or "practice" in action_lower:
            new_state["skill"] = new_state.get("skill", 0) + 5

        elif "explore" in action_lower:
            new_state["explored"] = new_state.get("explored", 0) + 1
            new_state["resources"] = new_state.get("resources", 0) + 5

        elif "optimize" in action_lower or "improve" in action_lower:
            for key in new_state:
                if isinstance(new_state[key], (int, float)) and new_state[key] > 0:
                    new_state[key] = int(new_state[key] * 1.1)

        self.state = new_state
        return new_state

In [None]:
# Example 1: Resource Management
print("="*60)
print("EXAMPLE 1: Resource Management Task")
print("="*60)

env1 = Environment({"resources": 10, "structures": 0, "population": 5})
agent1 = RAPAgent(env1)

agent1.plan_and_execute("Build 3 structures", max_steps=4)


# Example 2: Skill Development
print("\n" + "="*60)
print("EXAMPLE 2: Skill Development Path")
print("="*60)

env2 = Environment({"knowledge": 0, "skill": 0, "experience": 0})
agent2 = RAPAgent(env2)

agent2.plan_and_execute("Reach 50 skill points", max_steps=4)


# Example 3: Exploration & Expansion
print("\n" + "="*60)
print("EXAMPLE 3: Exploration Strategy")
print("="*60)

env3 = Environment({"explored": 0, "resources": 20, "territory": 1})
agent3 = RAPAgent(env3)

agent3.plan_and_execute("Explore 3 new areas and gather resources", max_steps=4)


# Example 4: Multi-objective Task
print("\n" + "="*60)
print("EXAMPLE 4: Complex Multi-step Task")
print("="*60)

env4 = Environment({
    "resources": 5,
    "knowledge": 0,
    "structures": 0,
    "efficiency": 1.0
})
agent4 = RAPAgent(env4)

agent4.plan_and_execute(
    "Build 2 structures while maintaining resources above 10",
    max_steps=5
)

print("✅ RAP: Reasoning via Planning Complete!")

EXAMPLE 1: Resource Management Task

🎯 Goal: Build 3 structures

📊 Initial state: {'resources': 10, 'structures': 0, 'population': 5}

────────────────────────────────────────────────────────────
STEP 1/4
────────────────────────────────────────────────────────────

🌲 MCTS Planning (depth=3)...

  Testing: 1.  Build Structure (cost: 5 resources)
