In [None]:
!pip -q install -U transformers sentence-transformers faiss-cpu accelerate

import os, time, json, math, sqlite3, hashlib
from dataclasses import dataclass
from typing import List, Dict, Any, Optional
import numpy as np
import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

def _now_ts():
    return int(time.time())

def _sha(s: str) -> str:
    return hashlib.sha256(s.encode("utf-8", errors="ignore")).hexdigest()[:16]

def _ensure_dir(p: str):
    os.makedirs(p, exist_ok=True)

def _safe_clip(text: str, max_chars: int = 1800) -> str:
    text = (text or "").strip()
    if len(text) <= max_chars:
        return text
    return text[:max_chars].rstrip() + " …"

@dataclass
class MemoryItem:
    mid: str
    role: str
    text: str
    created_ts: int
    importance: float
    tokens_est: int
    meta: Dict[str, Any]

In [None]:
class EverMemAgentOS:
    def __init__(
        self,
        workdir: str = "/content/evermem_agent_os",
        db_name: str = "evermem.sqlite",
        embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
        gen_model: str = "google/flan-t5-small",
        stm_max_turns: int = 10,
        ltm_topk: int = 6,
        consolidate_every: int = 8,
        consolidate_trigger_tokens: int = 1400,
        compress_target_chars: int = 420,
        seed: int = 7,
    ):
        self.workdir = workdir
        _ensure_dir(self.workdir)
        self.db_path = os.path.join(self.workdir, db_name)

        self.embedder = SentenceTransformer(embedding_model)
        self.embed_dim = self.embedder.get_sentence_embedding_dimension()

        self.tokenizer = AutoTokenizer.from_pretrained(gen_model)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(gen_model)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.model.eval()

        self.stm_max_turns = stm_max_turns
        self.ltm_topk = ltm_topk
        self.consolidate_every = consolidate_every
        self.consolidate_trigger_tokens = consolidate_trigger_tokens
        self.compress_target_chars = compress_target_chars

        np.random.seed(seed)

        self._init_db()
        self._init_faiss()

        self.stm: List[Dict[str, str]] = []
        self.turns = 0

    def _init_db(self):
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS memories (
                mid TEXT PRIMARY KEY,
                role TEXT,
                text TEXT,
                created_ts INTEGER,
                importance REAL,
                tokens_est INTEGER,
                meta_json TEXT
            )
            """
        )
        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS kv_store (
                k TEXT PRIMARY KEY,
                v_json TEXT,
                updated_ts INTEGER
            )
            """
        )
        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS consolidations (
                cid TEXT PRIMARY KEY,
                created_ts INTEGER,
                summary TEXT,
                source_mids_json TEXT
            )
            """
        )
        conn.commit()
        conn.close()

    def _init_faiss(self):
        self.faiss_index_path = os.path.join(self.workdir, "faiss.index")
        self.faiss_map_path = os.path.join(self.workdir, "faiss_map.json")

        if os.path.exists(self.faiss_index_path) and os.path.exists(self.faiss_map_path):
            self.index = faiss.read_index(self.faiss_index_path)
            with open(self.faiss_map_path, "r", encoding="utf-8") as f:
                self.id_map = json.load(f)
            self.id_map = {int(k): v for k, v in self.id_map.items()}
            self.next_faiss_id = (max(self.id_map.keys()) + 1) if self.id_map else 0
            return

        self.index = faiss.IndexFlatIP(self.embed_dim)
        self.id_map: Dict[int, str] = {}
        self.next_faiss_id = 0
        self._persist_faiss()

    def _persist_faiss(self):
        faiss.write_index(self.index, self.faiss_index_path)
        with open(self.faiss_map_path, "w", encoding="utf-8") as f:
            json.dump({str(k): v for k, v in self.id_map.items()}, f)

    def _embed(self, texts: List[str]) -> np.ndarray:
        vecs = self.embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
        if vecs.ndim == 1:
            vecs = vecs.reshape(1, -1)
        return vecs.astype("float32")

    def _tokens_est(self, text: str) -> int:
        text = text or ""
        return max(1, int(len(text.split()) * 1.25))

    def _importance_score(self, role: str, text: str, meta: Dict[str, Any]) -> float:
        base = 0.35
        length_bonus = min(0.45, math.log1p(len(text)) / 20.0)
        role_bonus = 0.08 if role == "user" else 0.03
        pin = 0.35 if meta.get("pinned") else 0.0
        signal = meta.get("signal", "")
        signal_bonus = 0.18 if signal in {"decision", "preference", "fact", "task"} else 0.0
        q_bonus = 0.06 if "?" in text else 0.0
        number_bonus = 0.05 if any(ch.isdigit() for ch in text) else 0.0
        return float(min(1.0, base + length_bonus + role_bonus + pin + signal_bonus + q_bonus + number_bonus))

    def upsert_kv(self, k: str, v: Any):
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute(
            "INSERT INTO kv_store (k, v_json, updated_ts) VALUES (?, ?, ?) ON CONFLICT(k) DO UPDATE SET v_json=excluded.v_json, updated_ts=excluded.updated_ts",
            (k, json.dumps(v, ensure_ascii=False), _now_ts()),
        )
        conn.commit()
        conn.close()

    def get_kv(self, k: str, default=None):
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute("SELECT v_json FROM kv_store WHERE k=?", (k,))
        row = cur.fetchone()
        conn.close()
        if not row:
            return default
        try:
            return json.loads(row[0])
        except Exception:
            return default

    def add_memory(self, role: str, text: str, meta: Optional[Dict[str, Any]] = None) -> str:
        meta = meta or {}
        text = (text or "").strip()
        mid = meta.get("mid") or f"m:{_sha(f'{_now_ts()}::{role}::{text[:80]}::{np.random.randint(0, 10**9)}')}"
        created_ts = _now_ts()
        tokens_est = self._tokens_est(text)
        importance = float(meta.get("importance")) if meta.get("importance") is not None else self._importance_score(role, text, meta)

        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute(
            "INSERT OR REPLACE INTO memories (mid, role, text, created_ts, importance, tokens_est, meta_json) VALUES (?, ?, ?, ?, ?, ?, ?)",
            (mid, role, text, created_ts, importance, tokens_est, json.dumps(meta, ensure_ascii=False)),
        )
        conn.commit()
        conn.close()

        vec = self._embed([text])
        fid = self.next_faiss_id
        self.next_faiss_id += 1
        self.index.add(vec)
        self.id_map[fid] = mid
        self._persist_faiss()

        return mid

In [None]:
    def _fetch_memories_by_ids(self, mids: List[str]) -> List[MemoryItem]:
        if not mids:
            return []
        placeholders = ",".join(["?"] * len(mids))
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute(
            f"SELECT mid, role, text, created_ts, importance, tokens_est, meta_json FROM memories WHERE mid IN ({placeholders})",
            mids,
        )
        rows = cur.fetchall()
        conn.close()

        items = []
        for r in rows:
            meta = {}
            try:
                meta = json.loads(r[6]) if r[6] else {}
            except Exception:
                meta = {}
            items.append(
                MemoryItem(
                    mid=r[0],
                    role=r[1],
                    text=r[2],
                    created_ts=int(r[3]),
                    importance=float(r[4]),
                    tokens_est=int(r[5]),
                    meta=meta,
                )
            )
        mid_pos = {m: i for i, m in enumerate(mids)}
        items.sort(key=lambda x: mid_pos.get(x.mid, 10**9))
        return items

    def retrieve_ltm(self, query: str, topk: Optional[int] = None) -> List[MemoryItem]:
        topk = topk or self.ltm_topk
        qv = self._embed([query])
        scores, ids = self.index.search(qv, topk + 8)
        mids = []
        for fid in ids[0].tolist():
            if fid == -1:
                continue
            mid = self.id_map.get(int(fid))
            if mid:
                mids.append(mid)
        mids = list(dict.fromkeys(mids))[:topk]
        return self._fetch_memories_by_ids(mids)

    def _format_stm(self) -> str:
        turns = self.stm[-self.stm_max_turns:]
        chunks = []
        for t in turns:
            chunks.append(f"{t['role'].upper()}: {t['content']}")
        return "\n".join(chunks).strip()

    def _format_ltm(self, ltm_items: List[MemoryItem]) -> str:
        if not ltm_items:
            return ""
        lines = []
        for i, it in enumerate(ltm_items, 1):
            ts_age = max(1, (_now_ts() - it.created_ts) // 3600)
            imp = f"{it.importance:.2f}"
            tag = it.meta.get("signal", "")
            tag = f" | {tag}" if tag else ""
            lines.append(f"[LTM {i}] (imp={imp}, age_h={ts_age}{tag}) {it.role}: {_safe_clip(it.text, 420)}")
        return "\n".join(lines).strip()

    @torch.inference_mode()
    def _gen(self, prompt: str, max_new_tokens: int = 180) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(self.device)
        out_ids = self.model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.6,
            top_p=0.92,
            num_beams=1,
        )
        out = self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
        return (out or "").strip()

    def _compress_memories(self, items: List[MemoryItem], max_chars: int = 520) -> str:
        raw = "\n".join([f"- {it.role}: {it.text}" for it in items])
        raw = _safe_clip(raw, 3500)
        prompt = (
            "Summarize the following notes into a compact memory that preserves decisions, preferences, facts, and tasks. "
            f"Keep it under {max_chars} characters.\n\nNOTES:\n{raw}\n\nCOMPACT MEMORY:"
        )
        summ = self._gen(prompt, max_new_tokens=170).strip()
        if len(summ) > max_chars:
            summ = summ[:max_chars].rstrip() + "…"
        return summ

    def consolidate(self) -> Optional[str]:
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute("SELECT mid, role, text, created_ts, importance, tokens_est, meta_json FROM memories ORDER BY created_ts DESC LIMIT 160")
        rows = cur.fetchall()
        conn.close()

        items = []
        for r in rows:
            try:
                meta = json.loads(r[6]) if r[6] else {}
            except Exception:
                meta = {}
            items.append(MemoryItem(r[0], r[1], r[2], int(r[3]), float(r[4]), int(r[5]), meta))

        if not items:
            return None

        items_sorted = sorted(items, key=lambda x: (-(x.importance + 0.15 * (1.0 / (1.0 + (_now_ts() - x.created_ts) / 3600.0))), -x.created_ts))
        picked = items_sorted[:18]
        summary = self._compress_memories(picked, max_chars=520)

        cid = f"c:{_sha(f'{_now_ts()}::{summary[:120]}::{np.random.randint(0, 10**9)}')}"
        source_mids = [it.mid for it in picked]

        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute(
            "INSERT OR REPLACE INTO consolidations (cid, created_ts, summary, source_mids_json) VALUES (?, ?, ?, ?)",
            (cid, _now_ts(), summary, json.dumps(source_mids, ensure_ascii=False)),
        )
        conn.commit()
        conn.close()

        self.add_memory(
            role="system",
            text=f"Consolidated memory: {summary}",
            meta={"signal": "consolidation", "pinned": True, "source_mids": source_mids, "cid": cid, "importance": 0.95},
        )
        return cid

In [None]:
    def _should_consolidate(self) -> bool:
        if self.turns > 0 and self.turns % self.consolidate_every == 0:
            return True
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute("SELECT SUM(tokens_est) FROM memories")
        s = cur.fetchone()[0]
        conn.close()
        s = int(s or 0)
        return s >= self.consolidate_trigger_tokens

    def chat(self, user_text: str, user_meta: Optional[Dict[str, Any]] = None, max_answer_tokens: int = 220) -> Dict[str, Any]:
        user_meta = user_meta or {}
        self.turns += 1

        self.stm.append({"role": "user", "content": user_text})
        self.stm = self.stm[-(self.stm_max_turns * 2):]
        self.add_memory("user", user_text, meta=user_meta)

        ltm = self.retrieve_ltm(user_text, topk=self.ltm_topk)
        stm_block = self._format_stm()
        ltm_block = self._format_ltm(ltm)

        sys_rules = (
            "You are an AI agent with persistent memory. Use retrieved long-term memories to stay consistent. "
            "If a memory conflicts with the user, ask a short clarifying question. Keep answers practical."
        )

        prompt = (
            f"{sys_rules}\n\n"
            f"SHORT-TERM CONTEXT:\n{_safe_clip(stm_block, 1800)}\n\n"
            f"RETRIEVED LONG-TERM MEMORIES:\n{ltm_block if ltm_block else '(none)'}\n\n"
            f"USER REQUEST:\n{user_text}\n\n"
            f"ANSWER:"
        )
        answer = self._gen(prompt, max_new_tokens=max_answer_tokens)

        self.stm.append({"role": "assistant", "content": answer})
        self.stm = self.stm[-(self.stm_max_turns * 2):]
        self.add_memory("assistant", answer, meta={"signal": "response"})

        consolidation_id = None
        if self._should_consolidate():
            consolidation_id = self.consolidate()

        return {
            "answer": answer,
            "retrieved_ltm": [
                {"mid": it.mid, "role": it.role, "importance": it.importance, "meta": it.meta, "text": _safe_clip(it.text, 320)}
                for it in ltm
            ],
            "consolidation_id": consolidation_id,
        }

    def inspect_recent_memories(self, n: int = 12) -> List[Dict[str, Any]]:
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute("SELECT mid, role, text, created_ts, importance, tokens_est, meta_json FROM memories ORDER BY created_ts DESC LIMIT ?", (n,))
        rows = cur.fetchall()
        conn.close()
        out = []
        for r in rows:
            try:
                meta = json.loads(r[6]) if r[6] else {}
            except Exception:
                meta = {}
            out.append({"mid": r[0], "role": r[1], "created_ts": int(r[3]), "importance": float(r[4]), "tokens_est": int(r[5]), "meta": meta, "text": _safe_clip(r[2], 520)})
        return out

    def inspect_consolidations(self, n: int = 5) -> List[Dict[str, Any]]:
        conn = sqlite3.connect(self.db_path)
        cur = conn.cursor()
        cur.execute("SELECT cid, created_ts, summary, source_mids_json FROM consolidations ORDER BY created_ts DESC LIMIT ?", (n,))
        rows = cur.fetchall()
        conn.close()
        out = []
        for r in rows:
            try:
                src = json.loads(r[3]) if r[3] else []
            except Exception:
                src = []
            out.append({"cid": r[0], "created_ts": int(r[1]), "summary": r[2], "source_mids": src})
        return out

In [3]:
agent = EverMemAgentOS()

agent.upsert_kv("profile", {"name": "User", "preferences": {"style": "concise"}})

demo_queries = [
    ("I prefer answers in bullet points and I’m working on a Colab tutorial.", {"signal": "preference", "pinned": True}),
    ("Remember that my project is about an EverMem-style agent OS with FAISS + SQLite.", {"signal": "fact", "pinned": True}),
    ("Give me a 5-step plan to add memory importance scoring and consolidation.", {"signal": "task"}),
    ("Now remind me what you know about my preferences and project, briefly.", {"signal": "task"}),
]

for q, meta in demo_queries:
    r = agent.chat(q, user_meta=meta, max_answer_tokens=180)
    print("\nUSER:", q)
    print("ASSISTANT:", r["answer"])
    if r["retrieved_ltm"]:
        print("RETRIEVED_LTM:", [(x["importance"], x["text"]) for x in r["retrieved_ltm"][:3]])
    if r["consolidation_id"]:
        print("CONSOLIDATED:", r["consolidation_id"])

print("\nRECENT MEMORIES:")
for m in agent.inspect_recent_memories(10):
    print(m["role"], m["importance"], m["text"])

print("\nRECENT CONSOLIDATIONS:")
for c in agent.inspect_consolidations(3):
    print(c["cid"], c["summary"])

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Loading weights:   0%|          | 0/190 [00:00<?, ?it/s]




USER: I prefer answers in bullet points and I’m working on a Colab tutorial.
ASSISTANT: are you using the Colab tutorial?
RETRIEVED_LTM: [(1.0, 'I prefer answers in bullet points and I’m working on a Colab tutorial.')]

USER: Remember that my project is about an EverMem-style agent OS with FAISS + SQLite.
ASSISTANT: answer: yes
RETRIEVED_LTM: [(1.0, 'Remember that my project is about an EverMem-style agent OS with FAISS + SQLite.'), (1.0, 'I prefer answers in bullet points and I’m working on a Colab tutorial.'), (0.6163180262308081, 'are you using the Colab tutorial?')]

USER: Give me a 5-step plan to add memory importance scoring and consolidation.
ASSISTANT: yes
RETRIEVED_LTM: [(0.8752032546602084, 'Give me a 5-step plan to add memory importance scoring and consolidation.'), (1.0, 'I prefer answers in bullet points and I’m working on a Colab tutorial.'), (1.0, 'Remember that my project is about an EverMem-style agent OS with FAISS + SQLite.')]

USER: Now remind me what you know abou