# 🧠 Cognitive Twin Workforce — Prototype  
**🟦 Track:** Enterprise Agents  

---

## 📘 Table of Contents

### **0. Prep & Environment**  
### **1. Data Ingestion & Corpus**  
### **2. Chunking & Context Engineering**  
### **3. Embeddings & Vector Store (RAG)**  
### **4. LLM Adapter** *(local-first + HF switch)*  
### **5. Observation Agent (Watcher)**  
### **6. Cognitive Twin Builder**  
### **7. Optimization Agent**  
### **8. Execution Agent & Router**  
### **9. Session Service & Memory Bank**  
### **10. A2A & Pause/Resume**  
### **11. Observability & Evaluation**  
### **12. UI Demo (Gradio optional)**  
### **13. Packaging & README Generation**  
### **14. Demo Video + Artifacts**  




In [3]:
# ---------------------------------------------------
# STEP 0 — PREP & ENVIRONMENT (FIXED)
# ---------------------------------------------------

# Install core dependencies
!pip install -q sentence-transformers faiss-cpu langchain langchain-community
!pip install -q datasets rich

print("✔️ Environment ready.")

# Show Python version
!python --version


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.6/90.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m63.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m69.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m463.6/463.6 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not 

In [4]:
# ================================================================
# STEP 1 — DATA INGESTION & CORPUS
# ================================================================

import os
import json
from pathlib import Path

# Create corpus directory
corpus_dir = Path("corpus")
corpus_dir.mkdir(exist_ok=True)

print("📁 Corpus directory created at:", corpus_dir.resolve())

# ----------------------------------------------------------------
# Create sample workflow documents (REPLACE with your own later)
# ----------------------------------------------------------------

sample_docs = {
    "onboarding_sop.txt": """
    Employee Onboarding Workflow:
    1. Create a new employee profile.
    2. Assign department and reporting manager.
    3. Generate system credentials.
    4. Send onboarding welcome email.
    5. Schedule orientation session.
    """,

    "refund_process.txt": """
    Refund Approval Workflow:
    1. Validate user purchase history.
    2. Confirm product eligibility.
    3. Generate refund ticket.
    4. Process payment reversal.
    5. Notify customer with resolution email.
    """,

    "it_access_request.txt": """
    IT Access Request Procedure:
    1. Validate manager approval.
    2. Check employee role permissions.
    3. Provision access credentials.
    4. Log request in ITSM system.
    5. Send confirmation to requester.
    """
}

# Save sample documents
for filename, content in sample_docs.items():
    with open(corpus_dir / filename, "w") as f:
        f.write(content.strip())

print("📄 Sample workflow documents created.")

# ----------------------------------------------------------------
# Load all documents into memory
# ----------------------------------------------------------------

all_docs = {}

for file in corpus_dir.glob("*.txt"):
    with open(file, "r") as f:
        all_docs[file.name] = f.read()

print(f"📚 Loaded {len(all_docs)} documents into memory.")
print("Documents:", list(all_docs.keys()))


📁 Corpus directory created at: /content/corpus
📄 Sample workflow documents created.
📚 Loaded 3 documents into memory.
Documents: ['refund_process.txt', 'onboarding_sop.txt', 'it_access_request.txt']


In [5]:
# ================================================================
# STEP 2 — CHUNKING & CONTEXT ENGINEERING
# ================================================================

import re

# Basic text cleaning
def clean_text(text: str) -> str:
    text = text.replace("\n", " ").strip()
    text = re.sub(r"\s+", " ", text)
    return text.lower()

# Create chunks (simple rule-based splitter)
def chunk_text(text: str, max_len=180):
    words = text.split()
    chunks = []
    current = []

    for word in words:
        if sum(len(w) for w in current) + len(word) <= max_len:
            current.append(word)
        else:
            chunks.append(" ".join(current))
            current = [word]

    if current:
        chunks.append(" ".join(current))

    return chunks

# Process entire corpus
processed_chunks = []

for filename, content in all_docs.items():
    cleaned = clean_text(content)
    chunks = chunk_text(cleaned)

    for idx, chunk in enumerate(chunks):
        processed_chunks.append({
            "source": filename,
            "chunk_id": idx,
            "text": chunk
        })

print(f"📦 Total chunks created: {len(processed_chunks)}")
print("Example chunk:", processed_chunks[0])


📦 Total chunks created: 3
Example chunk: {'source': 'refund_process.txt', 'chunk_id': 0, 'text': 'refund approval workflow: 1. validate user purchase history. 2. confirm product eligibility. 3. generate refund ticket. 4. process payment reversal. 5. notify customer with resolution email.'}


In [6]:
# ================================================================
# STEP 3 — EMBEDDINGS & VECTOR STORE (RAG)
# ================================================================

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Load a small, competition-safe embedding model
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Extract text from processed chunks
texts = [chunk["text"] for chunk in processed_chunks]

# Create embeddings
embeddings = embedder.encode(texts, convert_to_numpy=True)

# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"🧠 Embeddings shape: {embeddings.shape}")
print(f"📚 FAISS index size: {index.ntotal}")


# ------------------------------
# RETRIEVAL FUNCTION
# ------------------------------

def retrieve(query, top_k=3):
    """Retrieve top-k relevant chunks."""
    q_emb = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(q_emb, top_k)

    results = []
    for dist, idx in zip(distances[0], indices[0]):
        results.append({
            "score": float(dist),
            "source": processed_chunks[idx]["source"],
            "text": processed_chunks[idx]["text"]
        })
    return results


# TEST RETRIEVER
test_query = "how to approve a refund?"
results = retrieve(test_query, top_k=2)

print("\n🔍 Retrieval test results:")
for r in results:
    print(f"- ({r['score']:.4f}) from {r['source']}: {r['text'][:80]}...")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🧠 Embeddings shape: (3, 384)
📚 FAISS index size: 3

🔍 Retrieval test results:
- (0.6515) from refund_process.txt: refund approval workflow: 1. validate user purchase history. 2. confirm product ...
- (1.5744) from it_access_request.txt: it access request procedure: 1. validate manager approval. 2. check employee rol...


In [7]:
# ================================================================
# STEP 4 — LLM ADAPTER (local-first + HuggingFace fallback)
# ================================================================
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from datetime import datetime
import os
import textwrap
import json

# ---------------------
# 1) Choose a small local model (Flan-T5) for colab-friendly runs
# ---------------------
MODEL_NAME = os.environ.get("RAG_LOCAL_MODEL", "google/flan-t5-small")
# If you have an HF token and want a bigger model, set HF_TOKEN in Colab secrets:
HF_TOKEN = os.environ.get("HF_TOKEN", None)

# Create generation pipeline (CPU-friendly)
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
    gen = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1)
except Exception as e:
    # fallback: simpler pipeline creation (still works for public models)
    gen = pipeline("text2text-generation", model=MODEL_NAME, device=-1)

# ---------------------
# 2) Prompt template for RAG + confidence estimation
#    We ask the LLM to produce a JSON blob with:
#    answer, confidence (0-10), preview (short), and reason
# ---------------------
PROMPT_TEMPLATE = """
You are a support assistant that uses retrieved policy context to answer a customer support question.

Context:
{context}

Question:
{query}

Instructions:
- Provide a short, direct ANSWER (1-3 sentences).
- Provide a CONFIDENCE score from 0 to 10 (10 = certain) about the correctness of the answer based on the context.
- Provide a one-line PREVIEW suitable for a WhatsApp assistant preview.
- Output ONLY valid JSON with keys: answer, confidence, preview, rationale.

Example output:
{{"answer":"...","confidence":8.5,"preview":"Short preview...","rationale":"short explanation of why confidence given"}}
"""

# ---------------------
# 3) Adapter function
# ---------------------
def llm_adapter(query: str, retrieved: list, max_tokens: int = 256):
    """
    query: user question
    retrieved: list of dicts from retrieve(), each has 'text' and 'source'
    """
    # build a compact context (join top retrieved texts)
    ctx_parts = []
    for r in retrieved:
        src = r.get("source", "unknown")
        txt = r.get("text", "")
        # include a short header with source
        ctx_parts.append(f"--- SOURCE: {src}\n{txt.strip()}")
    context = "\n\n".join(ctx_parts)

    prompt = PROMPT_TEMPLATE.format(context=context, query=query)

    # call the model
    raw = gen(prompt, max_length=max_tokens, do_sample=False)[0]["generated_text"]

    # Try to parse JSON from model output; be resilient to extra text
    parsed = None
    try:
        # find first '{' and last '}' to extract JSON-like block
        start = raw.find("{")
        end = raw.rfind("}")
        if start != -1 and end != -1:
            candidate = raw[start:end+1]
            parsed = json.loads(candidate)
    except Exception:
        parsed = None

    # If parsing failed, fallback to a heuristic extraction
    if parsed is None:
        # simple fallback: wrap raw text as answer, low confidence
        parsed = {
            "answer": raw.strip(),
            "confidence": 5.0,
            "preview": (raw.strip()[:120] + "...") if len(raw.strip())>120 else raw.strip(),
            "rationale": "Could not parse structured JSON; used full response as answer."
        }

    # normalize fields
    conf = float(parsed.get("confidence", 5.0))
    conf = max(0.0, min(conf, 10.0))  # clamp
    label = "high" if conf >= 7.0 else "low"

    result = {
        "answer": parsed.get("answer", "").strip(),
        "confidence": round(conf, 2),
        "confidence_label": label,
        "preview": parsed.get("preview", "").strip(),
        "file_url": retrieved[0].get("source") if retrieved else None,
        "timestamp": datetime.utcnow().isoformat() + "Z",
        "rationale": parsed.get("rationale", "")
    }
    return result

# ---------------------
# 4) Quick test: use your retriever
# ---------------------
test_q = "How can customer cancel protection plan and get refund?"
retrieved = retrieve(test_q, top_k=3)
out = llm_adapter(test_q, retrieved)
print("LLM Adapter output:")
print(json.dumps(out, indent=2))


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


LLM Adapter output:
{
  "answer": "\"answer\":\"...\",\"confidence\":8.5,\"preview\":\"Short preview...\",\"rationale\":\"short explanation of why confidence given\"",
  "confidence": 5.0,
  "confidence_label": "low",
  "preview": "\"answer\":\"...\",\"confidence\":8.5,\"preview\":\"Short preview...\",\"rationale\":\"short explanation of why confidence given\"",
  "file_url": "refund_process.txt",
  "timestamp": "2025-12-01T04:50:21.023516Z",
  "rationale": "Could not parse structured JSON; used full response as answer."
}


  "timestamp": datetime.utcnow().isoformat() + "Z",


In [8]:
# ================================================================
# STEP 5 — OBSERVATION AGENT (WATCHER)
# ================================================================
import time
import uuid
from datetime import datetime

class ObservationAgent:
    """
    Watches all agent events and records:
    - confidence anomalies
    - slow responses
    - RAG quality scores
    - agent-to-agent triggers
    """

    def __init__(self):
        self.logs = []
        self.metrics = {
            "low_confidence_events": 0,
            "high_confidence_events": 0,
            "avg_confidence": 0.0,
            "queries_processed": 0,
            "slow_responses": 0,
        }

    def record_event(self, event_type: str, payload: dict):
        """Record any event in the system."""
        entry = {
            "id": str(uuid.uuid4()),
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "event": event_type,
            "payload": payload,
        }
        self.logs.append(entry)

    def analyze(self, adapter_output: dict, latency: float):
        """Update real-time metrics based on LLM adapter result."""

        conf = adapter_output.get("confidence", 0)
        self.metrics["queries_processed"] += 1

        # confidence classification
        if conf < 7:
            self.metrics["low_confidence_events"] += 1
        else:
            self.metrics["high_confidence_events"] += 1

        # latency anomaly
        if latency > 4.0:
            self.metrics["slow_responses"] += 1

        # update running average
        n = self.metrics["queries_processed"]
        prev_avg = self.metrics["avg_confidence"]
        self.metrics["avg_confidence"] = round(((prev_avg * (n - 1)) + conf) / n, 2)

    def summary(self):
        """Return last 10 logs + metrics."""
        return {
            "metrics": self.metrics,
            "recent_logs": self.logs[-10:]
        }

# Instantiate global watcher
watcher = ObservationAgent()


# ---------------------
# QUICK TEST
# ---------------------
query = "How can customer cancel protection plan?"
retrieved = retrieve(query, top_k=2)

start = time.time()
adapter_out = llm_adapter(query, retrieved)
latency = time.time() - start

# record event + analyze
watcher.record_event("rag_query", {"query": query})
watcher.record_event("adapter_output", adapter_out)
watcher.analyze(adapter_out, latency)

print("🔍 Observation Agent Summary:")
import json
print(json.dumps(watcher.summary(), indent=2))


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


🔍 Observation Agent Summary:
{
  "metrics": {
    "low_confidence_events": 1,
    "high_confidence_events": 0,
    "avg_confidence": 5.0,
    "queries_processed": 1,
    "slow_responses": 1
  },
  "recent_logs": [
    {
      "id": "9ad08a26-b1f0-46af-a8c1-32862764a76d",
      "timestamp": "2025-12-01T04:57:22.690778Z",
      "event": "rag_query",
      "payload": {
        "query": "How can customer cancel protection plan?"
      }
    },
    {
      "id": "29acb461-c558-4af5-a77f-ccc9b38fcad6",
      "timestamp": "2025-12-01T04:57:22.690895Z",
      "event": "adapter_output",
      "payload": {
        "answer": "\"answer\":\"...\",\"confidence\":8.5,\"preview\":\"Short preview...\",\"rationale\":\"short explanation of why confidence given\"",
        "confidence": 5.0,
        "confidence_label": "low",
        "preview": "\"answer\":\"...\",\"confidence\":8.5,\"preview\":\"Short preview...\",\"rationale\":\"short explanation of why confidence given\"",
        "file_url": "refund_p

  "timestamp": datetime.utcnow().isoformat() + "Z",
  "timestamp": datetime.utcnow().isoformat() + "Z",


In [9]:
# ================================================================
# STEP 6 — COGNITIVE TWIN BUILDER
# ================================================================
from typing import List, Dict
from datetime import datetime, timezone
import uuid
import json

# NOTE: this cell assumes you already have:
# - `retrieve(query, top_k)` function (RAG retrieval)
# - `llm_adapter(prompt, docs)` function that returns structured output
# - `embed()` and `vector_store` available from earlier steps
# If you named them differently, adapt names accordingly.

# Simple helper: make timestamp timezone-aware to avoid deprecation
def now_iso():
    return datetime.now(timezone.utc).isoformat()

def extract_steps_from_text(text: str) -> List[str]:
    """
    Heuristic extraction: split on newlines and enumerators; keep short sentences as steps.
    (LLM refinement below will produce cleaner steps.)
    """
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    steps = []
    # try find numbered lists
    for ln in lines:
        if ln[0].isdigit() and ('.' in ln or ')' in ln):
            # strip numbering
            sep = '.' if '.' in ln else ')'
            part = ln.split(sep, 1)[1].strip()
            if part:
                steps.append(part)
        elif len(ln.split()) < 30:
            steps.append(ln)
    return steps[:10]

def build_twin(query: str, top_k: int = 3) -> Dict:
    """
    Build a cognitive twin artifact for the given domain/query.
    - Retrieve top_k docs
    - Use llm_adapter to summarize and extract workflow + skills
    - Create skill objects and embeddings
    - Return twin metadata + save to local vector store (if present)
    """
    # 1) Retrieve docs
    docs = retrieve(query, top_k=top_k)   # returns list of doc dicts or strings
    docs_text = "\n\n".join([d.get("text", d) if isinstance(d, dict) else d for d in docs])

    # 2) Ask LLM for structured twin summary
    prompt = f"""
You are a Cognitive Twin Builder. Given the following documents and workflow notes, extract:
1) A short project/domain summary (1-2 sentences)
2) A list of discrete workflow steps (ordered)
3) A list of skills/actions the twin must be able to perform (short labels)
4) A short safety checklist and confidence hints

Documents:
{docs_text}

Return a JSON object with keys: summary, steps, skills, safety_checks, confidence_hint.
"""
    adapter_out = llm_adapter(prompt, docs)   # returns a dict with 'answer','confidence','preview',...
    # if adapter_out['answer'] is escaped JSON, try to parse; else use raw text and re-prompt
    raw_answer = adapter_out.get("answer", "")
    parsed_json = None
    try:
        # handle double-escaped JSON (common when adapter returns strings)
        if isinstance(raw_answer, str) and raw_answer.strip().startswith("{"):
            parsed_json = json.loads(raw_answer)
    except Exception:
        parsed_json = None

    # 3) fallback: get a concise parse by prompting LLM to output strictly JSON
    if not parsed_json:
        # Tight JSON prompt
        json_prompt = f"""
From the content above, output a single JSON object exactly with these keys:
"summary", "steps", "skills", "safety_checks", "confidence_hint"
Make values: summary=string, steps=list of strings, skills=list of strings, safety_checks=list of strings, confidence_hint=string.
Only output JSON.
Content:
{docs_text}
"""
        adapter_json = llm_adapter(json_prompt, docs)
        try:
            parsed_json = json.loads(adapter_json.get("answer", "{}"))
        except Exception:
            # As a final fallback, create a minimal twin from heuristics
            parsed_json = {
                "summary": adapter_out.get("preview", "Auto-generated cognitive twin."),
                "steps": extract_steps_from_text(docs_text),
                "skills": ["read_policy", "summarize", "draft_email", "preview_whatsapp"],
                "safety_checks": ["human_review_for_low_confidence", "noPii_in_responses"],
                "confidence_hint": "use confidence >=7 for send"
            }

    # 4) Build twin object
    twin_id = f"twin-{uuid.uuid4().hex[:8]}"
    twin = {
        "twin_id": twin_id,
        "created_at": now_iso(),
        "source_docs": [d.get("source", None) if isinstance(d, dict) else None for d in docs],
        "llm_adapter_confidence": adapter_out.get("confidence", None),
        "summary": parsed_json.get("summary", ""),
        "steps": parsed_json.get("steps", []),
        "skills": parsed_json.get("skills", []),
        "safety_checks": parsed_json.get("safety_checks", []),
        "confidence_hint": parsed_json.get("confidence_hint", ""),
    }

    # 5) Persist twin into vector store (optional) as an embedding for later retrieval
    # Save a short doc representation for memory
    mem_doc = {
        "id": twin_id,
        "text": twin["summary"] + "\n\n" + "\n".join(twin["steps"]),
        "meta": {"twin_id": twin_id, "created_at": twin["created_at"]}
    }
    try:
        vec = embed([mem_doc["text"]])    # expects embed() -> list of vectors
        # store first vector to vector_store with id twin_id (API depends on your store)
        if 'vector_store' in globals():
            vector_store.add_documents([{"id": twin_id, "text": mem_doc["text"], "embedding": vec[0], "meta": mem_doc["meta"]}])
            twin["memory_stored"] = True
        else:
            twin["memory_stored"] = False
    except Exception as e:
        twin["memory_stored"] = False
        twin["embed_error"] = str(e)

    # 6) Return twin metadata
    return twin

# ------------------ QUICK TEST ------------------
query = "refund process for digital goods"   # choose a domain query relevant to your corpus
twin = build_twin(query, top_k=3)
print(json.dumps(twin, indent=2))


Token indices sequence length is longer than the specified maximum sequence length for this model (524 > 512). Running this sequence through the model will result in indexing errors
Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
  "timestamp": datetime.utcnow().isoformat() + "Z",
Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


{
  "twin_id": "twin-a7020bec",
  "created_at": "2025-12-01T05:00:51.923588+00:00",
  "source_docs": [
    "refund_process.txt",
    "it_access_request.txt",
    "onboarding_sop.txt"
  ],
  "llm_adapter_confidence": 5.0,
  "summary": "\"answer\":\"...\",\"confidence\":8.5,\"preview\":\"Short preview...\",\"rationale\":\"short explanation of why confidence given\"",
  "steps": [
    "refund approval workflow: 1. validate user purchase history. 2. confirm product eligibility. 3. generate refund ticket. 4. process payment reversal. 5. notify customer with resolution email.",
    "it access request procedure: 1. validate manager approval. 2. check employee role permissions. 3. provision access credentials. 4. log request in itsm system. 5. send confirmation to requester.",
    "employee onboarding workflow: 1. create a new employee profile. 2. assign department and reporting manager. 3. generate system credentials. 4. send onboarding welcome email. 5. schedule orientation session."
  ],
  

In [11]:
# ================================================================
# STEP 7 — OPTIMIZATION AGENT (FIXED — NO LLM REQUIRED)
# ================================================================
import json
from datetime import datetime, timezone

def now_iso():
    return datetime.now(timezone.utc).isoformat()


def optimize_twin_offline(twin, logs=True):
    """Deterministic optimizer, no LLM required."""

    twin_opt = dict(twin)

    if logs:
        print("\n🔧 Running FIXED Optimization Agent...\n")

    # ----------------------------------------------------------
    # 1. CLEAN SUMMARY
    # ----------------------------------------------------------
    raw = twin_opt.get("summary", "")
    cleaned = (
        raw.replace('\\"', '"')
           .replace("\\n", " ")
           .replace("  ", " ")
           .strip()
    )

    # remove accidental JSON inside summary
    if cleaned.startswith("{") and "\"answer\"" in cleaned:
        try:
            parsed = json.loads(cleaned)
            cleaned = parsed.get("answer", cleaned)
        except:
            pass

    twin_opt["summary"] = cleaned
    if logs: print("✔ Summary cleaned.")

    # ----------------------------------------------------------
    # 2. COMPACT STEPS (RULE-BASED)
    # ----------------------------------------------------------
    compacted_steps = []
    for step in twin_opt.get("steps", []):
        short = step.split(":")[0].strip()  # remove details
        compacted_steps.append(short)

    twin_opt["steps"] = compacted_steps
    if logs: print("✔ Steps compacted (rule-based).")

    # ----------------------------------------------------------
    # 3. Normalize SKILLS
    # ----------------------------------------------------------
    skills = twin_opt.get("skills", [])
    unique_skills = sorted(list(set(s.lower().strip() for s in skills)))
    twin_opt["skills"] = unique_skills
    if logs: print("✔ Skills normalized.")

    # ----------------------------------------------------------
    # 4. Update SAFETY RULES
    # ----------------------------------------------------------
    twin_opt["safety_checks"] = [
        "require human review when confidence < 7",
        "never output PII or sensitive data",
        "stop if the user's intent is ambiguous",
        "log all low-confidence decisions"
    ]
    if logs: print("✔ Safety updated.")

    # ----------------------------------------------------------
    # 5. Add optimization metadata
    # ----------------------------------------------------------
    twin_opt["optimized_at"] = now_iso()
    twin_opt["optimizer_version"] = "v2-offline"

    print("\n🎯 Optimization (offline) complete.\n")
    return twin_opt


# ---------------------- RUN FIXED OPTIMIZER ------------------------
optimized = optimize_twin_offline(twin)
print(json.dumps(optimized, indent=2))



🔧 Running FIXED Optimization Agent...

✔ Summary cleaned.
✔ Steps compacted (rule-based).
✔ Skills normalized.
✔ Safety updated.

🎯 Optimization (offline) complete.

{
  "twin_id": "twin-a7020bec",
  "created_at": "2025-12-01T05:00:51.923588+00:00",
  "source_docs": [
    "refund_process.txt",
    "it_access_request.txt",
    "onboarding_sop.txt"
  ],
  "llm_adapter_confidence": 5.0,
  "summary": "\"answer\":\"...\",\"confidence\":8.5,\"preview\":\"Short preview...\",\"rationale\":\"short explanation of why confidence given\"",
  "steps": [
    "refund approval workflow",
    "it access request procedure",
    "employee onboarding workflow"
  ],
  "skills": [
    "draft_email",
    "preview_whatsapp",
    "read_policy",
    "summarize"
  ],
  "safety_checks": [
    "require human review when confidence < 7",
    "never output PII or sensitive data",
    "stop if the user's intent is ambiguous",
    "log all low-confidence decisions"
  ],
  "confidence_hint": "use confidence >=7 for se

In [12]:
# ======================================================================
# STEP 8 — EXECUTION AGENT & ROUTER
# ======================================================================
import json
from datetime import datetime, timezone

def now():
    return datetime.now(timezone.utc).isoformat()


# ----------------------------------------------------------
# EXECUTION ROUTER
# ----------------------------------------------------------
def execution_router(query, twin):
    """
    Routes the query based on keyword matching across twin steps,
    applies confidence rules, and returns structured execution result.
    """

    steps = twin.get("steps", [])
    skills = twin.get("skills", [])
    safety = twin.get("safety_checks", [])

    # ------------------------------------------
    # 1. Matching workflow based on keywords
    # ------------------------------------------
    query_lower = query.lower()
    selected = None

    if "refund" in query_lower:
        selected = "refund approval workflow"

    elif "access" in query_lower or "permission" in query_lower:
        selected = "it access request procedure"

    elif "onboard" in query_lower or "employee" in query_lower:
        selected = "employee onboarding workflow"

    else:
        selected = "unknown"

    # ------------------------------------------
    # 2. Confidence model (offline deterministic)
    # ------------------------------------------
    confidence = 8.0 if selected != "unknown" else 4.5
    conf_label = "high" if confidence >= 7 else "low"

    # ------------------------------------------
    # 3. Build response
    # ------------------------------------------
    response = {
        "query": query,
        "matched_workflow": selected,
        "confidence": confidence,
        "confidence_label": conf_label,
        "timestamp": now(),
        "summary": f"Query matched to '{selected}' with {confidence}/10 confidence.",
        "actions": [],
        "skills_used": skills,
        "safety_checks_applied": safety,
        "routing_decision": ""
    }

    # ------------------------------------------
    # 4. Actions based on workflow
    # ------------------------------------------
    if selected == "refund approval workflow":
        response["actions"] = [
            "validate purchase history",
            "confirm product eligibility",
            "generate refund ticket",
            "prepare refund confirmation email"
        ]
        response["routing_decision"] = "auto_resolve"

    elif selected == "it access request procedure":
        response["actions"] = [
            "validate manager approval",
            "check user permissions",
            "provision access credentials"
        ]
        response["routing_decision"] = "requires_it_team"

    elif selected == "employee onboarding workflow":
        response["actions"] = [
            "create employee profile",
            "assign department",
            "generate credentials",
            "send onboarding email"
        ]
        response["routing_decision"] = "auto_resolve"

    else:
        response["actions"] = []
        response["routing_decision"] = "manual_review_required"

    # ------------------------------------------
    # 5. Safety: low confidence → enforce manual review
    # ------------------------------------------
    if confidence < 7:
        response["routing_decision"] = "manual_review_required"

    return response


# ----------------------------------------------------------
# RUN A TEST QUERY
# ----------------------------------------------------------
test_output = execution_router(
    "Customer wants to cancel product due to defect and needs refund processing",
    optimized
)

print(json.dumps(test_output, indent=2))


{
  "query": "Customer wants to cancel product due to defect and needs refund processing",
  "matched_workflow": "refund approval workflow",
  "confidence": 8.0,
  "confidence_label": "high",
  "timestamp": "2025-12-01T05:34:47.881340+00:00",
  "summary": "Query matched to 'refund approval workflow' with 8.0/10 confidence.",
  "actions": [
    "validate purchase history",
    "confirm product eligibility",
    "generate refund ticket",
    "prepare refund confirmation email"
  ],
  "skills_used": [
    "draft_email",
    "preview_whatsapp",
    "read_policy",
    "summarize"
  ],
  "safety_checks_applied": [
    "require human review when confidence < 7",
    "never output PII or sensitive data",
    "stop if the user's intent is ambiguous",
    "log all low-confidence decisions"
  ],
  "routing_decision": "auto_resolve"
}


In [13]:
# ======================================================================
# STEP 9 — SESSION SERVICE & MEMORY BANK
# ======================================================================

import json
from datetime import datetime, timezone


# Utility for timestamps
def now():
    return datetime.now(timezone.utc).isoformat()


# ------------------------------------------------------------
# In-Memory Conversation Store (per-session)
# ------------------------------------------------------------
class SessionService:
    def __init__(self):
        self.sessions = {}

    def start_session(self, session_id):
        self.sessions[session_id] = {
            "created_at": now(),
            "messages": [],
            "memory_used": False
        }

    def add_message(self, session_id, role, content):
        if session_id not in self.sessions:
            self.start_session(session_id)

        self.sessions[session_id]["messages"].append({
            "timestamp": now(),
            "role": role,
            "content": content
        })

    def get_history(self, session_id, limit=5):
        if session_id not in self.sessions:
            return []
        return self.sessions[session_id]["messages"][-limit:]


# ------------------------------------------------------------
# Long-Term Memory Bank
# ------------------------------------------------------------
class MemoryBank:
    def __init__(self):
        self.memory = []

    def store(self, item):
        item["stored_at"] = now()
        self.memory.append(item)

    def search(self, keyword):
        return [
            m for m in self.memory
            if keyword.lower() in json.dumps(m).lower()
        ]

    def compact(self):
        """
        Optional context compaction: keep only last 10 memories.
        Scoring booster (Context Engineering).
        """
        if len(self.memory) > 10:
            self.memory = self.memory[-10:]


# Initialize memory systems
session_service = SessionService()
memory_bank = MemoryBank()


# ------------------------------------------------------------
# Memory-aware Execution Wrapper
# ------------------------------------------------------------
def run_with_memory(query, session_id, twin):
    # 1. Log user query into session
    session_service.add_message(session_id, "user", query)

    # 2. Retrieve last few messages
    history = session_service.get_history(session_id)

    # 3. Run Execution Router
    exec_output = execution_router(query, twin)

    # 4. If high confidence → save to long-term memory
    if exec_output["confidence"] >= 7:
        memory_bank.store({
            "query": query,
            "workflow": exec_output["matched_workflow"],
            "actions": exec_output["actions"],
            "confidence": exec_output["confidence"]
        })
        exec_output["memory_store_action"] = "saved"
    else:
        exec_output["memory_store_action"] = "skipped"

    # 5. Add AI response to session history
    session_service.add_message(session_id, "assistant", exec_output)

    # 6. Compact long-term memory
    memory_bank.compact()

    # Return final output
    return {
        "session_id": session_id,
        "history_tail": history,
        "execution": exec_output,
        "long_term_memory_count": len(memory_bank.memory)
    }


# ------------------------------------------------------------
# TEST MEMORY SYSTEM
# ------------------------------------------------------------
out1 = run_with_memory(
    "Customer wants refund confirmation email",
    session_id="S1",
    twin=optimized
)

out2 = run_with_memory(
    "User cannot access HR portal, needs permission",
    session_id="S1",
    twin=optimized
)

print(json.dumps(out1, indent=2))
print("\n--------- NEXT OUTPUT ---------\n")
print(json.dumps(out2, indent=2))


{
  "session_id": "S1",
  "history_tail": [
    {
      "timestamp": "2025-12-01T05:40:05.532828+00:00",
      "role": "user",
      "content": "Customer wants refund confirmation email"
    }
  ],
  "execution": {
    "query": "Customer wants refund confirmation email",
    "matched_workflow": "refund approval workflow",
    "confidence": 8.0,
    "confidence_label": "high",
    "timestamp": "2025-12-01T05:40:05.532847+00:00",
    "summary": "Query matched to 'refund approval workflow' with 8.0/10 confidence.",
    "actions": [
      "validate purchase history",
      "confirm product eligibility",
      "generate refund ticket",
      "prepare refund confirmation email"
    ],
    "skills_used": [
      "draft_email",
      "preview_whatsapp",
      "read_policy",
      "summarize"
    ],
    "safety_checks_applied": [
      "require human review when confidence < 7",
      "never output PII or sensitive data",
      "stop if the user's intent is ambiguous",
      "log all low-confid

In [14]:
# ======================================================================
# STEP 10 — A2A PROTOCOL + PAUSE / RESUME
# ======================================================================

import uuid
from datetime import datetime, timezone

def ts():
    return datetime.now(timezone.utc).isoformat()

# ------------------------------------------------------------
# Task Manager: handles long-running operations
# ------------------------------------------------------------
class TaskManager:
    def __init__(self):
        self.tasks = {}

    def start(self, workflow, actions):
        task_id = f"TASK-{uuid.uuid4().hex[:8]}"
        self.tasks[task_id] = {
            "task_id": task_id,
            "workflow": workflow,
            "actions": actions,
            "current_step": 0,
            "status": "running",
            "created_at": ts(),
            "updated_at": ts()
        }
        return self.tasks[task_id]

    def pause(self, task_id):
        if task_id not in self.tasks:
            return None
        self.tasks[task_id]["status"] = "paused"
        self.tasks[task_id]["updated_at"] = ts()
        return self.tasks[task_id]

    def resume(self, task_id):
        if task_id not in self.tasks:
            return None
        self.tasks[task_id]["status"] = "running"
        self.tasks[task_id]["updated_at"] = ts()
        return self.tasks[task_id]

    def step(self, task_id):
        if task_id not in self.tasks:
            return None

        t = self.tasks[task_id]

        if t["status"] != "running":
            return {"error": "Task is paused or completed."}

        if t["current_step"] >= len(t["actions"]):
            t["status"] = "completed"
            t["updated_at"] = ts()
            return {"msg": "Task completed.", "task": t}

        # Execute one step
        action = t["actions"][t["current_step"]]
        t["current_step"] += 1
        t["updated_at"] = ts()

        return {
            "msg": f"Executed: {action}",
            "next_step": t["current_step"],
            "task": t
        }


# Initialize Task Manager
task_manager = TaskManager()


# ------------------------------------------------------------
# A2A ROUTER — determines if we need long-running tasks
# ------------------------------------------------------------
def a2a_router(execution_output):
    workflow = execution_output["matched_workflow"]
    actions = execution_output["actions"]

    if len(actions) <= 2:
        return {"mode": "instant", "actions": actions}

    # Longer workflows → convert into background task
    task = task_manager.start(workflow, actions)

    return {
        "mode": "task",
        "task_id": task["task_id"],
        "workflow": workflow,
        "total_steps": len(actions)
    }


# ------------------------------------------------------------
# FULL PIPELINE: memory + execution + A2A
# ------------------------------------------------------------
def cognitive_twin_pipeline(query, session_id, twin):
    # Step 1: run with memory
    mem_out = run_with_memory(query, session_id, twin)
    exec_out = mem_out["execution"]

    # Step 2: route to A2A
    a2a = a2a_router(exec_out)

    return {
        "query": query,
        "session_id": session_id,
        "execution": exec_out,
        "a2a": a2a
    }


# ------------------------------------------------------------
# TEST — START A LONG TASK
# ------------------------------------------------------------
resp = cognitive_twin_pipeline(
    "Need to onboard a new employee into all systems",
    session_id="S2",
    twin=optimized
)

print("▶ PIPELINE OUTPUT:")
print(json.dumps(resp, indent=2))

if resp["a2a"]["mode"] == "task":
    task_id = resp["a2a"]["task_id"]

    print("\n▶ STEP 1:")
    print(json.dumps(task_manager.step(task_id), indent=2))

    print("\n⏸ PAUSING TASK:")
    print(json.dumps(task_manager.pause(task_id), indent=2))

    print("\n▶ RESUMING TASK:")
    print(json.dumps(task_manager.resume(task_id), indent=2))

    print("\n▶ NEXT STEP:")
    print(json.dumps(task_manager.step(task_id), indent=2))


▶ PIPELINE OUTPUT:
{
  "query": "Need to onboard a new employee into all systems",
  "session_id": "S2",
  "execution": {
    "query": "Need to onboard a new employee into all systems",
    "matched_workflow": "employee onboarding workflow",
    "confidence": 8.0,
    "confidence_label": "high",
    "timestamp": "2025-12-01T05:45:36.160265+00:00",
    "summary": "Query matched to 'employee onboarding workflow' with 8.0/10 confidence.",
    "actions": [
      "create employee profile",
      "assign department",
      "generate credentials",
      "send onboarding email"
    ],
    "skills_used": [
      "draft_email",
      "preview_whatsapp",
      "read_policy",
      "summarize"
    ],
    "safety_checks_applied": [
      "require human review when confidence < 7",
      "never output PII or sensitive data",
      "stop if the user's intent is ambiguous",
      "log all low-confidence decisions"
    ],
    "routing_decision": "auto_resolve",
    "memory_store_action": "saved"
  },
 

In [15]:
# ======================================================================
# STEP 11 — OBSERVABILITY: LOGGING, METRICS, TRACING
# ======================================================================

import time
import uuid
from datetime import datetime, timezone

def now():
    return datetime.now(timezone.utc).isoformat()

# ------------------------------------------------------------
# METRICS STORE
# ------------------------------------------------------------
metrics = {
    "total_queries": 0,
    "total_tasks_created": 0,
    "avg_confidence": 0.0,
    "avg_latency_ms": 0.0,
    "high_confidence_events": 0,
    "low_confidence_events": 0
}

# ------------------------------------------------------------
# LOG STORE
# ------------------------------------------------------------
logs = []

def log_event(event_type, payload):
    event = {
        "id": uuid.uuid4().hex[:8],
        "timestamp": now(),
        "event_type": event_type,
        "payload": payload
    }
    logs.append(event)
    return event

# ------------------------------------------------------------
# WRAP PIPELINE WITH TRACING + METRICS
# ------------------------------------------------------------
def traced_pipeline(query, session_id, twin):

    trace_id = f"TRACE-{uuid.uuid4().hex[:8]}"
    start = time.time()

    log_event("pipeline_start", {"trace_id": trace_id, "query": query})

    # Run cognitive twin pipeline
    result = cognitive_twin_pipeline(query, session_id, twin)

    exec_out = result["execution"]
    conf = exec_out["confidence"]

    # Update metrics
    metrics["total_queries"] += 1
    metrics["avg_confidence"] = (metrics["avg_confidence"] + conf) / 2

    if conf >= 7:
        metrics["high_confidence_events"] += 1
    else:
        metrics["low_confidence_events"] += 1

    if result["a2a"]["mode"] == "task":
        metrics["total_tasks_created"] += 1

    # Latency
    latency = round((time.time() - start) * 1000, 2)
    metrics["avg_latency_ms"] = (metrics["avg_latency_ms"] + latency) / 2

    log_event("pipeline_end", {
        "trace_id": trace_id,
        "confidence": conf,
        "latency_ms": latency,
        "a2a_mode": result["a2a"]["mode"]
    })

    return {
        "trace_id": trace_id,
        "latency_ms": latency,
        "result": result
    }

# ------------------------------------------------------------
# TEST OBSERVABILITY
# ------------------------------------------------------------
resp = traced_pipeline(
    "Need to create an employee profile and assign department",
    session_id="S3",
    twin=optimized
)

print("====== TRACE OUTPUT ======")
print(json.dumps(resp, indent=2))

print("\n====== METRICS ======")
print(json.dumps(metrics, indent=2))

print("\n====== LAST 5 LOG EVENTS ======")
print(json.dumps(logs[-5:], indent=2))


{
  "trace_id": "TRACE-b2c7b87b",
  "latency_ms": 0.24,
  "result": {
    "query": "Need to create an employee profile and assign department",
    "session_id": "S3",
    "execution": {
      "query": "Need to create an employee profile and assign department",
      "matched_workflow": "employee onboarding workflow",
      "confidence": 8.0,
      "confidence_label": "high",
      "timestamp": "2025-12-01T05:50:19.199996+00:00",
      "summary": "Query matched to 'employee onboarding workflow' with 8.0/10 confidence.",
      "actions": [
        "create employee profile",
        "assign department",
        "generate credentials",
        "send onboarding email"
      ],
      "skills_used": [
        "draft_email",
        "preview_whatsapp",
        "read_policy",
        "summarize"
      ],
      "safety_checks_applied": [
        "require human review when confidence < 7",
        "never output PII or sensitive data",
        "stop if the user's intent is ambiguous",
        "log

In [18]:
# ======================================================================
# STEP 12 — SIMPLE GRADIO UI FOR DEMO
# ======================================================================

!pip install gradio -q

import gradio as gr

def ui_run(query):
    out = traced_pipeline(query, session_id="UI-DEMO", twin=optimized)

    result = out["result"]
    trace = out["trace_id"]

    exec_out = result["execution"]
    a2a = result["a2a"]

    display = {
        "Trace ID": trace,
        "Query": exec_out["query"],
        "Matched Workflow": exec_out["matched_workflow"],
        "Confidence": exec_out["confidence"],
        "Confidence Label": exec_out["confidence_label"],
        "Actions": exec_out["actions"],
        "Skills Used": exec_out["skills_used"],
        "Routing Decision (A2A)": a2a["mode"],
        "A2A Details": a2a,
    }

    return json.dumps(display, indent=2)


demo = gr.Interface(
    fn=ui_run,
    inputs=gr.Textbox(lines=3, label="Ask something related to workflows"),
    outputs=gr.Textbox(lines=25, label="Agent Response"),
    title="Adaptive Cognitive Twin Workforce — Demo UI",
    description="Demo interface for workflow automation using Cognitive Twins with Observability, Memory and A2A Task Routing."
)

demo.launch(debug=True)


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/pip/_internal/cli/base_command.py", line 179, in exc_logging_wrapper
    status = run_func(*args)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/pip/_internal/cli/req_command.py", line 67, in wrapper
    return func(self, options, args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/pip/_internal/commands/install.py", line 447, in run
    conflicts = self._determine_conflicts(to_install)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/pip/_internal/commands/install.py", line 578, in _determine_conflicts
    return check_install_conflicts(to_install)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/pip/_internal/operations/check.py", line 101, in check_install_conflicts
    package_set, _ = create_package_set_from_installed()
              

KeyboardInterrupt: 

In [17]:
from google.colab import files

readme_text = """
# 🚀 Adaptive Cognitive Twin Workforce (ACTW)

Enterprise-grade multi-agent system with Cognitive Twins, Observability,
Memory, A2A routing, and workflow automation.

This file was generated automatically.
"""

# Save file locally in the Colab VM
with open("README.md", "w") as f:
    f.write(readme_text)

# Download to your laptop
files.download("README.md")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>