<a href="https://colab.research.google.com/github/frank-morales2020/Cloud_curious/blob/master/MEMENTO_GROK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install xai-sdk -q

In [None]:
# notebook-python
# Improved Memento-style agent with real embeddings + growing trajectories

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 0. INSTALL DEPENDENCIES (run once in Colab)
# !pip install -q sentence-transformers

import numpy as np
from typing import List, Optional
from pydantic import BaseModel, Field, ConfigDict
from xai_sdk import Client
from xai_sdk.chat import user, system
from google.colab import userdata
from sentence_transformers import SentenceTransformer

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 1. INITIALIZE
XAI_key = userdata.get('XAI_KEY')
client = Client(api_host="api.x.ai", api_key=XAI_key)

# Real embedding model (small & fast)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 2. DATA SCHEMA
class TrajectoryStep(BaseModel):
    action: str
    observation: str = Field(..., description="Result or feedback from the action")

class ExperienceCase(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    problem: str
    embedding: np.ndarray
    trajectory: List[TrajectoryStep] = Field(default_factory=list)
    success: bool = False
    final_observation: Optional[str] = None

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 3. MEMORY ENGINE
class MementoMemory:
    def __init__(self, similarity_threshold: float = 0.65):
        self.cases: List[ExperienceCase] = []
        self.threshold = similarity_threshold

    def _embed(self, text: str) -> np.ndarray:
        return embedder.encode(text, convert_to_numpy=True)

    def store(self, problem: str, trajectory: List[TrajectoryStep], success: bool, final_observation: str):
        # Simple dedup: don't store if very similar problem already exists
        emb = self._embed(problem)
        for case in self.cases:
            sim = np.dot(emb, case.embedding) / (np.linalg.norm(emb) * np.linalg.norm(case.embedding))
            if sim > 0.97:  # almost identical
                return

        case = ExperienceCase(
            problem=problem,
            embedding=emb,
            trajectory=trajectory,
            success=success,
            final_observation=final_observation
        )
        self.cases.append(case)
        print(f"  ‚Üí Stored new experience ({'SUCCESS' if success else 'FAILURE'})")

    def retrieve_context(self, current_problem: str, top_k: int = 3) -> str:
        if not self.cases:
            return "No prior experience available."

        current_emb = self._embed(current_problem)
        scored_cases = []
        for case in self.cases:
            sim = np.dot(current_emb, case.embedding) / (
                np.linalg.norm(current_emb) * np.linalg.norm(case.embedding) + 1e-8
            )
            if sim >= self.threshold:
                scored_cases.append((sim, case))

        if not scored_cases:
            return "No sufficiently similar past experiences found."

        # Sort by similarity descending
        scored_cases.sort(key=lambda x: x[0], reverse=True)
        top_cases = scored_cases[:top_k]

        memory_blocks = []
        for sim, case in top_cases:
            status = "SUCCESSFUL" if case.success else "FAILED"
            traj_str = "\n".join([f"  ‚Ä¢ {s.action} ‚Üí {s.observation}" for s in case.trajectory])
            block = (
                f"Similarity: {sim:.3f} | Status: {status}\n"
                f"Task: {case.problem}\n"
                f"Trajectory:\n{traj_str}\n"
                f"Final outcome: {case.final_observation}"
            )
            memory_blocks.append(block)

        return "\n\n".join(["--- PAST EXPERIENCE ---"] + memory_blocks) + "\n"

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 4. AGENT
class MementoAgent:
    def __init__(self, memory: MementoMemory):
        self.memory = memory

    def execute(self, task: str):
        print(f"\n‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó")
        print(  f"‚ïë TARGET: {task}")
        print(  f"‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù")

        mem_context = self.memory.retrieve_context(task)

        prompt = (
            "You are an adaptive agent that learns from past experience.\n"
            "Use the provided memory to avoid repeating mistakes.\n"
            "Always decide between API_V1 (old, often broken) and API_V2 (current, preferred).\n\n"
            f"{mem_context}\n"
            f"Current Task: {task}\n\n"
            "Respond concisely. End your answer with exactly one of:\n"
            "ACTION: API_V1\n"
            "ACTION: API_V2\n"
        )

        # Call Grok
        chat_response = client.chat.create(
            model="grok-4-1-fast-reasoning",  # or grok-beta / grok-3 / etc.
            messages=[
                system("You are a helpful, reasoning-focused agent."),
                user(prompt)
            ]
        )

        # Robust content extraction
        if hasattr(chat_response, 'choices') and chat_response.choices:
            content = chat_response.choices[0].message.content
        elif hasattr(chat_response, 'message'):
            content = chat_response.message.content
        else:
            content = str(chat_response)  # fallback

        print("\n[GROK REASONING]")
        print(content.strip())
        print("-" * 50)

        # Parse final action
        action = "API_V1"
        if "ACTION: API_V2" in content.upper():
            action = "API_V2"

        # Simulated environment
        success = (action == "API_V2")
        observation = (
            "API call succeeded ‚Äì report retrieved"
            if success else
            "Error 404: API_V1 is deprecated and no longer available"
        )

        print(f"[ACTION TAKEN]   {action}")
        print(f"[OBSERVATION]    {observation}\n")

        # Build trajectory (in real system you would append more steps)
        trajectory = [
            TrajectoryStep(action=f"Selected endpoint: {action}", observation=observation)
        ]

        # Store full experience
        self.memory.store(
            problem=task,
            trajectory=trajectory,
            success=success,
            final_observation=observation
        )

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 5. DEMO RUN
if __name__ == "__main__":
    memory_bank = MementoMemory(similarity_threshold=0.68)
    agent = MementoAgent(memory_bank)

    print("=== DEMONSTRATION ===")

    print("\nRUN 1 ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
    agent.execute("Fetch quarterly sales report_alpha")

    print("\nRUN 2 ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
    agent.execute("Retrieve inventory status report_beta")

    print("\nRUN 3 ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
    agent.execute("Download financial summary report_gamma")

    print("\nRUN 4 (very similar to first) ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
    agent.execute("Fetch quarterly sales report_delta")   # should reuse memory

In [9]:
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# MEMENTO-INSPIRED AGENT ‚Äî FINAL TUNED VERSION (stronger adaptation)
# Removed API_V1 choice entirely + early-stop + reflection prompt
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

# !pip install -q sentence-transformers xai-sdk

import numpy as np
from typing import List, Tuple
from pydantic import BaseModel, Field, ConfigDict
from xai_sdk import Client
from xai_sdk.chat import user, system
from google.colab import userdata
from sentence_transformers import SentenceTransformer
import time

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 1. SETUP
XAI_KEY = userdata.get('XAI_KEY')
client = Client(api_host="api.x.ai", api_key=XAI_KEY)

embedder = SentenceTransformer('all-MiniLM-L6-v2')

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 2. DATA MODELS
class Step(BaseModel):
    thought: str
    action: str                    # Now only "USE_ENDPOINT" or "FINISH"
    observation: str

class Case(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    task: str
    task_embedding: np.ndarray
    trajectory: List[Step] = Field(default_factory=list)
    final_success: bool = False
    final_reward: float = 0.0
    created_at: float = Field(default_factory=time.time)

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 3. MEMORY BANK (prioritize high-reward cases)
class MementoCaseBank:
    def __init__(self, top_k: int = 3, sim_threshold: float = 0.60):
        self.cases: List[Case] = []
        self.top_k = top_k
        self.sim_threshold = sim_threshold

    def _embed(self, text: str) -> np.ndarray:
        return embedder.encode(text, convert_to_numpy=True)

    def add(self, task: str, trajectory: List[Step], success: bool, reward: float):
        emb = self._embed(task)
        for c in self.cases:
            sim = np.dot(emb, c.task_embedding) / (np.linalg.norm(emb) * np.linalg.norm(c.task_embedding) + 1e-8)
            if sim > 0.97:
                if success and reward > c.final_reward:
                    c.trajectory = trajectory
                    c.final_success = success
                    c.final_reward = reward
                    print(f"  ‚Üª Updated better case")
                return

        self.cases.append(Case(task=task, task_embedding=emb, trajectory=trajectory,
                               final_success=success, final_reward=reward))
        print(f"  ‚Üí Stored case | Success={success} | Reward={reward:.2f} | Steps={len(trajectory)}")

    def retrieve(self, current_task: str) -> str:
        if not self.cases:
            return "No past cases."

        emb = self._embed(current_task)
        scored = []
        for case in self.cases:
            sim = np.dot(emb, case.task_embedding) / (np.linalg.norm(emb) * np.linalg.norm(case.task_embedding) + 1e-8)
            if sim >= self.sim_threshold:
                scored.append((sim, case))

        if not scored:
            return "No similar cases found."

        scored.sort(key=lambda x: (-x[0], -x[1].final_reward))  # sim desc, then reward desc
        top = scored[:self.top_k]

        blocks = []
        for sim, case in top:
            traj = "\n".join(f"  ‚Ä¢ {s.thought[:60]}... ‚Üí {s.action} ‚Üí {s.observation[:60]}..." for s in case.trajectory)
            block = f"Sim: {sim:.3f} | Reward: {case.final_reward:.2f} | Success: {case.final_success}\nTask: {case.task}\n{traj}"
            blocks.append(block)

        return "--- RETRIEVED SUCCESSFUL STRATEGIES (prioritize these) ---\n\n" + "\n\n".join(blocks) + "\n"

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 4. AGENT
class MementoGrokAgent:
    def __init__(self, memory: MementoCaseBank):
        self.memory = memory
        self.max_steps = 4

    def run(self, task: str, force_first_failure: bool = False):
        print(f"\n{'‚ïê'*60}\nTASK: {task}\n{'‚ïê'*60}")

        context = self.memory.retrieve(task)
        trajectory: List[Step] = []
        success = False

        for step_idx in range(1, self.max_steps + 1):
            prompt = (
                "You are an adaptive Memento-style agent. Use memory to succeed fast.\n"
                "CRITICAL RULES:\n"
                "1. NEVER output API_V1 ‚Äî it is deprecated and forbidden.\n"
                "2. ALWAYS use the current reliable endpoint when acting.\n"
                "3. Output only 'USE_ENDPOINT' for actions or 'FINISH' when done.\n"
                "4. Start THOUGHT by summarizing past cases: 'From memory: ... Therefore I will...'\n\n"
                f"{context}\n\n"
                f"Task: {task}  (step {step_idx}/{self.max_steps})\n"
                "THOUGHT: <reasoning, reference memory explicitly>\n"
                "ACTION: USE_ENDPOINT   or   ACTION: FINISH\n"
            )

            resp = client.chat.create(
                model="grok-4-1-fast-reasoning",
                messages=[system("Precise memory-driven planner."), user(prompt)]
            )

            content = resp.choices[0].message.content if hasattr(resp, 'choices') else str(resp)

            print(f"\n[Step {step_idx}]")
            print(content.strip()[:500] + ("..." if len(content) > 500 else ""))

            thought = ""
            action_raw = ""
            for line in content.splitlines():
                line = line.strip()
                if line.upper().startswith("THOUGHT:"):
                    thought = line[8:].strip()
                elif line.upper().startswith("ACTION:"):
                    action_raw = line[7:].strip().upper()

            if "FINISH" in action_raw:
                trajectory.append(Step(thought=thought, action="FINISH", observation="Task complete"))
                success = True
                break

            action = "USE_ENDPOINT"

            # Simulated env
            if force_first_failure and len(self.memory.cases) == 0 and step_idx == 1:
                obs = "Initial endpoint attempt failed (forced for demo) ‚Äî retrying smarter"
            else:
                obs = "Success: data retrieved via reliable endpoint"

            trajectory.append(Step(thought=thought, action=action, observation=obs))
            print(f"‚Üí Action: {action}")
            print(f"‚Üí Obs: {obs}")

            if "Success" in obs:
                success = True
                break  # EARLY STOP on success

        final_reward = 1.0 if success else 0.0
        print(f"\n[FINAL] Success = {success} | Reward = {final_reward:.2f}\n")
        self.memory.add(task, trajectory, success, final_reward)

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 5. RUN DEMO
if __name__ == "__main__":
    bank = MementoCaseBank()
    agent = MementoGrokAgent(bank)

    tasks = [
        "Fetch quarterly sales report Q1 2025",
        "Retrieve inventory status Europe region",
        "Fetch quarterly sales report Q2 2025",
        "Get marketing campaign performance Q1 2025",
        "Fetch quarterly sales report Q3 2025"
    ]

    for i, task in enumerate(tasks, 1):
        force = (i == 1)
        agent.run(task, force_first_failure=force)

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.



‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
TASK: Fetch quarterly sales report Q1 2025
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

[Step 1]
messages {
  content {
    text: "Precise memory-driven planner."
  }
  role: ROLE_SYSTEM
}
messages {
  content {
    text: "You are an adaptive Memento-style agent. Use memory to succeed fast.\nCRITICAL RULES:\n1. NEVER output API_V1 ‚Äî it is deprecated and forbidden.\n2. ALWAYS use the current reliable endpoint when acting.\n3. Output only \'USE_ENDPOINT\' for actions or \'FINISH\' when done.\n4. Start THOUGHT by summarizing past cases: \'From memory: ... Therefore I will...\'\n\nNo past cases...
‚Üí Action: USE_ENDPOINT
‚Üí Obs: Initial endpoint attempt failed (forced for 

## GPT4 AND GROK4.1

In [16]:
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# MEMENTO COMPARISON ‚Äî Grok-4.1 vs GPT-4.1 + o4-mini (FINAL #1)
# All issues fixed: roles, temperature, max_tokens, executor prompt, syntax
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

# !pip install -q sentence-transformers openai xai-sdk

import numpy as np
from typing import List
from pydantic import BaseModel, Field, ConfigDict
from xai_sdk import Client as XAIClient
from xai_sdk.chat import system, user
from openai import OpenAI
from google.colab import userdata
from sentence_transformers import SentenceTransformer

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 1. CLIENTS
XAI_KEY = userdata.get('XAI_KEY')
OPENAI_KEY = userdata.get('OPENAI_API_KEY')   # ‚Üê Must be set in Colab Secrets

xai_client = XAIClient(api_host="api.x.ai", api_key=XAI_KEY)
openai_client = OpenAI(api_key=OPENAI_KEY)

embedder = SentenceTransformer('all-MiniLM-L6-v2')

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 2. MODELS
class Step(BaseModel):
    thought: str
    action: str
    observation: str

class Case(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    task: str
    task_embedding: np.ndarray
    trajectory: List[Step] = Field(default_factory=list)
    final_success: bool = False
    final_reward: float = 0.0

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 3. MEMORY
class MementoCaseBank:
    def __init__(self, top_k=3, sim_threshold=0.60):
        self.cases: List[Case] = []
        self.top_k = top_k
        self.sim_threshold = sim_threshold

    def _embed(self, text: str) -> np.ndarray:
        return embedder.encode(text, convert_to_numpy=True)

    def add(self, task: str, trajectory: List[Step], success: bool, reward: float):
        emb = self._embed(task)
        for c in self.cases:
            norm = np.linalg.norm(emb) * np.linalg.norm(c.task_embedding) + 1e-8
            sim = np.dot(emb, c.task_embedding) / norm
            if sim > 0.97:
                if success and reward > c.final_reward:
                    c.trajectory = trajectory
                    c.final_success = success
                    c.final_reward = reward
                return
        self.cases.append(Case(task=task, task_embedding=emb, trajectory=trajectory,
                               final_success=success, final_reward=reward))

    def retrieve(self, current_task: str) -> str:
        if not self.cases:
            return "No past cases."

        emb = self._embed(current_task)
        scored = []

        for c in self.cases:
            norm = np.linalg.norm(emb) * np.linalg.norm(c.task_embedding) + 1e-8
            sim = np.dot(emb, c.task_embedding) / norm
            if sim >= self.sim_threshold:
                scored.append((sim, c))

        if not scored:
            return "No similar cases found."

        scored.sort(key=lambda x: (-x[0], -x[1].final_reward))
        top = scored[:self.top_k]

        blocks = []
        for sim, c in top:
            traj = "\n".join(
                f"  ‚Ä¢ {s.thought[:65] + '...' if len(s.thought) > 65 else s.thought} ‚Üí "
                f"{s.action} ‚Üí {s.observation[:65] + '...' if len(s.observation) > 65 else s.observation}"
                for s in c.trajectory
            )
            blocks.append(f"Sim: {sim:.3f} | Reward: {c.final_reward:.2f}\nTask: {c.task}\n{traj}")

        return "--- RETRIEVED SUCCESSFUL STRATEGIES ---\n\n" + "\n\n".join(blocks) + "\n"

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 4. AGENT
class MementoAgent:
    def __init__(self, memory: MementoCaseBank, backend: str = "grok"):
        self.memory = memory
        self.backend = backend.lower()
        self.max_steps = 4

    def _call_planner(self, prompt: str) -> str:
        if self.backend == "grok":
            resp = xai_client.chat.create(
                model="grok-4-1-fast-reasoning",
                messages=[system("You are a precise memory-driven planner."), user(prompt)]
            )
            return resp.choices[0].message.content if hasattr(resp, 'choices') else str(resp)

        resp = openai_client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are a precise Memento-style planner."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.0,
            max_tokens=400
        )
        return resp.choices[0].message.content

    def _call_executor(self, action: str, task: str) -> str:
        if self.backend == "grok":
            return "Success: data retrieved via reliable endpoint"

        exe_prompt = (
            f"For task '{task}' and action '{action}':\n"
            "Return ONLY exactly one of these two lines and nothing else:\n"
            "Success: data retrieved\n"
            "Error: failed to retrieve data"
        )
        resp = openai_client.chat.completions.create(
            model="o4-mini",
            messages=[{"role": "user", "content": exe_prompt}],
            seed=42
        )
        return resp.choices[0].message.content.strip()

    def run(self, task: str, force_first_failure: bool = False):
        print(f"\n{'‚ïê'*75}\n{self.backend.upper()} ‚Üí {task}\n{'‚ïê'*75}")

        context = self.memory.retrieve(task)
        trajectory: List[Step] = []
        success = False

        for step_idx in range(1, self.max_steps + 1):
            prompt = (
                "CRITICAL:\n"
                "‚Ä¢ Start with: THOUGHT: From memory: ... Therefore I will...\n"
                "‚Ä¢ Then: ACTION: USE_ENDPOINT or ACTION: FINISH\n\n"
                f"{context}\n"
                f"Task: {task} (step {step_idx}/{self.max_steps})\n"
            )

            content = self._call_planner(prompt)

            print(f"[Step {step_idx} ‚Äî {self.backend.upper()} Planner]")
            print(content.strip()[:500] + ("..." if len(content) > 500 else ""))

            thought = "No thought parsed"
            action_raw = "USE_ENDPOINT"
            for line in content.splitlines():
                line = line.strip()
                if line.upper().startswith("THOUGHT:"):
                    thought = line[8:].strip()
                elif line.upper().startswith("ACTION:"):
                    action_raw = line[7:].strip().upper()

            if "FINISH" in action_raw:
                trajectory.append(Step(thought=thought, action="FINISH", observation="Complete"))
                success = True
                break

            if force_first_failure and len(self.memory.cases) == 0 and step_idx == 1:
                obs = "Forced demo failure ‚Äî learning unreliable endpoint"
            else:
                obs = self._call_executor("USE_ENDPOINT", task)

            trajectory.append(Step(thought=thought, action="USE_ENDPOINT", observation=obs))
            print(f"‚Üí Obs: {obs}")

            if "Success" in obs or "retrieved" in obs.lower():
                success = True
                break

        final_reward = 1.0 if success else 0.0
        print(f"\n[FINAL {self.backend.upper()}] Success = {success} | Reward = {final_reward:.2f}\n")
        self.memory.add(task, trajectory, success, final_reward)

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# 5. COMPARISON
if __name__ == "__main__":
    print("üöÄ MEMENTO COMPARISON ‚Äî Grok vs GPT-4.1 + o4-mini\n")

    tasks = [
        "Fetch quarterly sales report Q1 2025",
        "Retrieve inventory status Europe region",
        "Fetch quarterly sales report Q2 2025",
        "Get marketing campaign performance Q1 2025",
        "Fetch quarterly sales report Q3 2025"
    ]

    # Grok
    bank_grok = MementoCaseBank()
    agent_grok = MementoAgent(bank_grok, backend="grok")
    print("=== GROK-4.1 MODE ===")
    for i, t in enumerate(tasks, 1):
        agent_grok.run(t, force_first_failure=(i == 1))

    # Paper
    bank_paper = MementoCaseBank()
    agent_paper = MementoAgent(bank_paper, backend="openai")
    print("\n=== PAPER MODE ===")
    for i, t in enumerate(tasks, 1):
        agent_paper.run(t, force_first_failure=(i == 1))

    # Table
    print("\n" + "="*100)
    print("COMPARISON SUMMARY")
    print("="*100)
    print(f"{'Task':<50} {'Grok Steps':<12} {'Paper Steps':<12} {'Grok Succ':<10} {'Paper Succ':<10} {'Faster'}")
    print("-"*100)
    for i, task in enumerate(tasks):
        g_steps = len(bank_grok.cases[i].trajectory) if i < len(bank_grok.cases) else "N/A"
        p_steps = len(bank_paper.cases[i].trajectory) if i < len(bank_paper.cases) else "N/A"
        g_succ = "Yes" if i < len(bank_grok.cases) and bank_grok.cases[i].final_success else "No"
        p_succ = "Yes" if i < len(bank_paper.cases) and bank_paper.cases[i].final_success else "No"
        faster = "Tie" if g_steps == p_steps else ("Paper" if isinstance(p_steps, int) and isinstance(g_steps, int) and p_steps < g_steps else "Grok")
        print(f"{task[:49]:<50} {g_steps:<12} {p_steps:<12} {g_succ:<10} {p_succ:<10} {faster}")
    print("="*100)

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


üöÄ MEMENTO COMPARISON ‚Äî Grok vs GPT-4.1 + o4-mini

=== GROK-4.1 MODE ===

‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
GROK ‚Üí Fetch quarterly sales report Q1 2025
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
[Step 1 ‚Äî GROK Planner]
messages {
  content {
    text: "You are a precise memory-driven planner."
  }
  role: ROLE_SYSTEM
}
messages {
  content {
    text: "CRITICAL:\n‚Ä¢ Start with: THOUGHT: From memory: ... Therefore I will...\n‚Ä¢ Then: ACTION: USE_ENDPOINT or ACTION: FINISH\n\nNo past cases.\nTask: Fetch quarterly sales report Q1 2025 (step 1/4)\n"
  }
  role: ROLE_USER
}
model: "grok-4-1-fast-reasoning"
‚Üí Ob