In [1]:
pip install pinecone

Collecting pinecone
  Downloading pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0 (from pinecone)
  Downloading pinecone_plugin_assistant-1.8.0-py3-none-any.whl.metadata (30 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Downloading pinecone-7.3.0-py3-none-any.whl (587 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.6/587.6 kB[0m [31m5.0 MB/s[0m  [33m0:00:00[0m
[?25hDownloading pinecone_plugin_assistant-1.8.0-py3-none-any.whl (259 kB)
Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone-plugin-assistant, pinecone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [pinecone]2/3[0m [pinecone]
[1A[2KSuccessfully installed pinecone-7.3.0 pinecone-plugin-assistant-1.8.0 pinecone-plugin-interface-0.0.7



In [2]:
# 1. Data Ingest
import os
import time
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv

In [None]:
# --- 1. SETUP AND INITIALIZATION ---
print("--- Initializing clients and models ---")
load_dotenv() # Load variables from .env file
# Configure Gemini (we don't use it here, but good practice)
#genai.configure(api_key='__')
genai.configure(api_key='')
# Connect to Pinecone
#pc = Pinecone(api_key='__'
pc = Pinecone(api_key='')             

--- Initializing clients and models ---


In [8]:
# --- 2. DEFINE CONSTANTS ---
EMBED_MODEL_NAME = 'all-MiniLM-L6-v2' # 384 dimensions
DIMENSION = 384
INDEX_NAME = "coffeeindex" # Give our index a name
DOCS_NS = "docs" # The namespace for our documents

In [9]:
# Our "Knowledge Base" of coffee documents
DOCUMENTS = [
    {
        "id": "doc-1",
        "text": "Ashwagandha coffee is a beverage that blends coffee with Ashwagandha root powder, an adaptogen used in Ayurvedic medicine. It's claimed to help reduce stress and anxiety.",
        "metadata": {"title": "Ashwagandha Coffee", "category": "Herbal Blends"}
    },
    {
        "id": "doc-2",
        "text": "A turmeric latte, also known as 'golden milk', is a traditional caffeine-free drink. It's made with milk (or a non-dairy alternative), turmeric, ginger, cinnamon, and a sweetener. It is prized for its anti-inflammatory properties.",
        "metadata": {"title": "Turmeric Latte", "category": "Caffeine-Free"}
    },
    {
        "id": "doc-3",
        "text": "For a standard espresso shot, the typical brew ratio is 1:2, meaning 18 grams of ground coffee yields a 36-gram liquid shot in about 25-30 seconds.",
        "metadata": {"title": "Espresso Brew Ratios", "category": "Brewing"}
    },
    {
        "id": "doc-4",
        "text": "Mushroom coffee, such as Chaga or Lion's Mane, is a blend that offers lower caffeine levels than regular coffee. It's often promoted for its cognitive and immune-boosting benefits.",
        "metadata": {"title": "Mushroom Coffee", "category": "Herbal Blends"}
    }
]

In [10]:
# --- 3. CREATE INDEX (IF NOT EXISTS) ---
print(f"Checking if index '{INDEX_NAME}' exists...")
if INDEX_NAME not in [i["name"] for i in pc.list_indexes()]:
    print(f"Index not found. Creating a new serverless index: {INDEX_NAME}")
    pc.create_index(
        name=INDEX_NAME,
        dimension=DIMENSION,
        metric="cosine", # Cosine similarity is great for semantic search
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    # Wait for the index to be ready
    while not pc.describe_index(INDEX_NAME).status['ready']:
        print("Waiting for index to be ready...")
        time.sleep(5)
else:
    print(f"Index '{INDEX_NAME}' already exists.")

index = pc.Index(INDEX_NAME)
print(f"Successfully connected to index: '{INDEX_NAME}'")

Checking if index 'coffeeindex' exists...
Index not found. Creating a new serverless index: coffeeindex
Successfully connected to index: 'coffeeindex'


In [14]:
# --- 4. EMBED AND UPSERT DOCUMENTS ---
print(f"Loading embedding model: {EMBED_MODEL_NAME}...")
embedder = SentenceTransformer(EMBED_MODEL_NAME)

print(f"Embedding {len(DOCUMENTS)} documents...")
texts = [doc['text'] for doc in DOCUMENTS]
embeddings = embedder.encode(texts, normalize_embeddings=True).tolist()

# Prepare vectors for upsert
vectors_to_upsert = []
for i, doc in enumerate(DOCUMENTS):
    vectors_to_upsert.append({
        "id": doc['id'],
        "values": embeddings[i],
        "metadata": {
            "title": doc['metadata']['title'],
            "text": doc['text']
        }
    })
print(f"Upserting {len(vectors_to_upsert)} vectors into namespace '{DOCS_NS}'...")
# ✅ FIX: Only delete namespace if it already exists
try:
    stats = index.describe_index_stats()
    if DOCS_NS in stats.get("namespaces", {}):
        print(f"Clearing namespace '{DOCS_NS}' before upserting...")
        index.delete(delete_all=True, namespace=DOCS_NS)
    else:
        print(f"Namespace '{DOCS_NS}' does not exist yet. Skipping delete.")
except Exception as e:
    print(f"Warning: Could not check or clear namespace '{DOCS_NS}'. Continuing anyway. ({e})")

# Now upsert your documents
index.upsert(vectors=vectors_to_upsert, namespace=DOCS_NS)

print("--- Ingestion Complete ---")
print(index.describe_index_stats())

Loading embedding model: all-MiniLM-L6-v2...
Embedding 4 documents...
Upserting 4 vectors into namespace 'docs'...
Namespace 'docs' does not exist yet. Skipping delete.
--- Ingestion Complete ---
{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'docs': {'vector_count': 4}},
 'total_vector_count': 4,
 'vector_type': 'dense'}


In [16]:
#2.basic_rag_bot.py

# Initialize models
EMBED_MODEL_NAME = 'all-MiniLM-L6-v2'
embedder = SentenceTransformer(EMBED_MODEL_NAME)
llm = genai.GenerativeModel("gemini-2.5-flash")

# Connect to our Pinecone index
INDEX_NAME = "coffeeindex"
DOCS_NS = "docs"
index = pc.Index(INDEX_NAME)
print("Clients and models loaded.")

# --- 2. DEFINE THE PROMPT TEMPLATE ---
# We define the template as a simple string, NO YAML needed [cite: 3484-3485]
BASIC_RAG_TEMPLATE = """
You are a helpful coffee expert. Use the following CONTEXT to answer the QUESTION.
If the answer is not in the context, say "I don't know from the provided documents."

QUESTION:
{query}

CONTEXT:
{context}
"""

Clients and models loaded.


In [19]:
# --- 3. THE BASIC RAG PIPELINE FUNCTION ---
def basic_rag(query: str, k: int = 2):
    """Performs a complete, stateless RAG lookup."""
    print(f"\n--- Basic RAG Query ---")
    print(f"User: {query}")

    # 1. RETRIEVE
    print(f"Retrieving top-{k} documents...")
    query_vector = embedder.encode([query], normalize_embeddings=True)[0].tolist()
    search_results = index.query(
        vector=query_vector,
        top_k=k,
        include_metadata=True, # We need the metadata to get the text
        namespace=DOCS_NS
    )

    # 2. COMPOSE PROMPT
    contexts = []
    print("Retrieved Documents:")
    for match in search_results["matches"]:
        text = match['metadata'].get("text", "No text metadata found")
        contexts.append(text)
        print(f"  - (Score: {match['score']:.4f}) {text[:80]}...")

    joined_context = "\n\n---\n\n".join(contexts)

    # Fill the prompt template
    prompt = BASIC_RAG_TEMPLATE.format(query=query, context=joined_context)
    print("\n"*5)
    print(joined_context)
    print("\n"*5)
    print(prompt)
    print("\n"*5)

    # 3. GENERATE
    print("Calling Gemini to generate a grounded answer...")
    response = llm.generate_content(prompt)

    print(f"\nAssistant: {response.text}")
    return response.text

In [20]:
# --- 4. RUN THE DEMO ---
if __name__ == "__main__":
    basic_rag("What is Ashwagandha coffee?")


--- Basic RAG Query ---
User: What is Ashwagandha coffee?
Retrieving top-2 documents...
Retrieved Documents:
  - (Score: 0.8663) Ashwagandha coffee is a beverage that blends coffee with Ashwagandha root powder...
  - (Score: 0.4822) Mushroom coffee, such as Chaga or Lion's Mane, is a blend that offers lower caff...






Ashwagandha coffee is a beverage that blends coffee with Ashwagandha root powder, an adaptogen used in Ayurvedic medicine. It's claimed to help reduce stress and anxiety.

---

Mushroom coffee, such as Chaga or Lion's Mane, is a blend that offers lower caffeine levels than regular coffee. It's often promoted for its cognitive and immune-boosting benefits.







You are a helpful coffee expert. Use the following CONTEXT to answer the QUESTION.
If the answer is not in the context, say "I don't know from the provided documents."

QUESTION:
What is Ashwagandha coffee?

CONTEXT:
Ashwagandha coffee is a beverage that blends coffee with Ashwagandha root powder, an adaptogen

In [22]:
#+++++++++++++++++++++++++++++ RAG with Memory +++++++++++++++++++++++++++

In [27]:
import re
from typing import List, Dict, Tuple

# --- 3. HELPER FUNCTIONS (Memory, Retrieval, Formatting) ---

def extract_user_facts(user_text: str) -> List[str]:
    """A simple regex extractor for demo purposes."""
    # This looks for phrases like "I like...", "I prefer...", "I avoid...", "I am..."
    pats = [r"\bI (?:like|love|prefer)\b[^.]+", r"\bI (?:avoid|usually|often|am)\b[^.]+"]
    findings = []
    for pat in pats:
        findings += [m.group(0).strip() for m in re.finditer(pat, user_text, flags=re.I)]
    return sorted(set(findings))

def add_memory_facts(facts: List[str]) -> None:
    """Embeds and upserts facts into the user's memory namespace."""
    if not facts:
        return

    print(f"  [Memory System: Found {len(facts)} new fact(s) to remember.]")
    vecs = embedder.encode(facts, normalize_embeddings=True).tolist()
    now = int(time.time())
    payload = []
    for i, fact in enumerate(facts):
        vid = f"{USER_ID}:{now}:{i}" # Create a unique ID for the memory vector
        meta = {"fact": fact, "user_id": USER_ID, "ts": now}
        # Use tuple format (id, vector, metadata) for upsert
        payload.append((vid, vecs[i], meta))

    # Upsert into the user's dedicated MEM_NS namespace
    index.upsert(vectors=payload, namespace=MEM_NS)

def retrieve_memory(query_text: str, k: int = 2) -> List[Dict]:
    """Retrieves relevant facts from the user's memory namespace."""
    qv = embedder.encode([query_text], normalize_embeddings=True)[0].tolist()
    res = index.query(vector=qv, top_k=k, include_metadata=True, namespace=MEM_NS)
    return res.get("matches", [])

def retrieve_docs(query_text: str, k: int = 3) -> List[Dict]:
    """Retrieves relevant documents from the main 'docs' namespace."""
    qv = embedder.encode([query_text], normalize_embeddings=True)[0].tolist()
    res = index.query(vector=qv, top_k=k, include_metadata=True, namespace=DOCS_NS)
    return res.get("matches", [])

In [29]:
# --- Formatting helpers --- [cite: 3605]
def format_history(history: List[Dict]) -> str:
    if not history:
        return "None"
    # Format for the prompt, clearly labeling roles [cite: 3524-3527]
    return "\n".join(f"{h['role'].capitalize()}: {h['content']}" for h in history)

def format_memory(mem_hits: List[Dict]) -> str:
    if not mem_hits:
        return "None"
    return "\n".join(f"- {h['metadata'].get('fact', '')}" for h in mem_hits)

def format_context(doc_hits: List[Dict]) -> str:
    if not doc_hits:
        return "None"
    lines = []
    for h in doc_hits:
        doc_id = h.get("id", "")
        text = h['metadata'].get("text", "No text found")
        lines.append(f"[{doc_id}] {text}")
    return "\n".join(lines)

def build_prompt(query: str, history: List[Dict], doc_hits: List[Dict], mem_hits: List[Dict]) -> str:
    return MEMORY_RAG_TEMPLATE.format(
        history=format_history(history),
        memory_block=format_memory(mem_hits),
        query=query,
        context_block=format_context(doc_hits)
    )

# --- 4. THE CONVERSATIONAL TURN ORCHESTRATOR --- [cite: 3614]
def memory_rag_turn(user_text: str) -> str:
    """Performs a complete, stateful RAG turn."""

    # 1. Update long-term memory with new facts from the user's message
    new_facts = extract_user_facts(user_text)
    add_memory_facts(new_facts)

    # 2. Retrieve from both documents AND long-term memory
    print(f"  [Retrieving docs for: '{user_text}']")
    doc_hits = retrieve_docs(user_text, k=3)
    print(f"  [Retrieving memories for: '{user_text}']")
    mem_hits = retrieve_memory(user_text, k=2)

    # 3. Assemble the full prompt
    prompt = build_prompt(user_text, chat_history, doc_hits, mem_hits)

    print("\n"*10)
    print(prompt)
    print("\n"*10)

    # 4. Call the LLM
    print("  [Generating response...]")
    response = llm.generate_content(prompt)
    answer = getattr(response, "text", "Sorry, I couldn't generate a response.").strip()

    # 5. Update short-term memory (the conversation history)
    chat_history.append({"role": "user", "content": user_text})
    chat_history.append({"role": "assistant", "content": answer})

    # 6. Show the user the answer
    print(f"\nAssistant: {answer}")
    return answer

# --- 5. INTERACTIVE CHAT LOOP ---
if __name__ == "__main__":
    print("\n--- Coffee Memory-RAG Bot ---")
    print("Type 'exit' to quit. Try stating a preference, like 'I prefer low-caffeine drinks.'")
    while True:
        q = input("\nYou: ").strip()
        if q.lower() in {"exit", "quit"}:
            print("Goodbye!")
            break
        if not q:
            continue
        memory_rag_turn(q)



--- Coffee Memory-RAG Bot ---
Type 'exit' to quit. Try stating a preference, like 'I prefer low-caffeine drinks.'



You:  what is the name of coffee i did not like 


  [Retrieving docs for: 'what is the name of coffee i did not like']
  [Retrieving memories for: 'what is the name of coffee i did not like']













You are a helpful coffee expert. Use the following CONTEXT, and when useful,
use RELEVANT_MEMORY to answer the QUESTION.
If the answer is not in the context, say "I don't know from the provided documents."
Don't create/invent imaginary sources. Cite source [DOC ID] when you see them.

Conversation so far :
User: i hate turmeric latte
Assistant: I understand you don't like turmeric latte. [doc-2] describes what a turmeric latte (also known as 'golden milk') is – a traditional caffeine-free drink made with milk, turmeric, ginger, cinnamon, and a sweetener, prized for its anti-inflammatory properties.

I don't know from the provided documents about alternatives or other information related to disliking turmeric lattes.
User: what i did like in coffee
Assistant: I don't know from the provided documents about what you liked in coffee.

Re


You:  exit


Goodbye!
