In [4]:
!pip install --user pinecone

Collecting pinecone
  Downloading pinecone-8.0.0-py3-none-any.whl.metadata (11 kB)
Collecting pinecone-plugin-assistant<4.0.0,>=3.0.1 (from pinecone)
  Downloading pinecone_plugin_assistant-3.0.1-py3-none-any.whl.metadata (30 kB)
Collecting packaging<25.0,>=24.2 (from pinecone-plugin-assistant<4.0.0,>=3.0.1->pinecone)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Downloading pinecone-8.0.0-py3-none-any.whl (745 kB)
   ---------------------------------------- 0.0/745.9 kB ? eta -:--:--
   ---------------------------------------- 0.0/745.9 kB ? eta -:--:--
   -------------- ------------------------- 262.1/745.9 kB ? eta -:--:--
   -------------- ------------------------- 262.1/745.9 kB ? eta -:--:--
   -------------------------- ----------- 524.3/745.9 kB 699.0 kB/s eta 0:00:01
   ---------------------------------------- 745.9/745.9 kB 774.0 kB/s  0:00:01
Downloading pinecone_plugin_assistant-3.0.1-py3-none-any.whl (280 kB)
Downloading packaging-24.2-py3-none-any.whl (6

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mediapipe 0.10.14 requires protobuf<5,>=4.25.3, but you have protobuf 6.33.2 which is incompatible.
streamlit 1.35.0 requires protobuf<5,>=3.20, but you have protobuf 6.33.2 which is incompatible.
tensorflow-intel 2.16.1 requires ml-dtypes~=0.3.1, but you have ml-dtypes 0.5.4 which is incompatible.
tensorflow-intel 2.16.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 6.33.2 which is incompatible.
tensorflow-intel 2.16.1 requires tensorboard<2.17,>=2.16, but you have tensorboard 2.20.0 which is incompatible.
tensorflow-serving-api 2.16.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 6.33.2 which is incompatible.


In [None]:

import os

# add pinecone and hf api

In [1]:
import os
import json
from typing import List, Literal
from typing_extensions import TypedDict
from pydantic import BaseModel

from huggingface_hub import InferenceClient
from pinecone import Pinecone, ServerlessSpec

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore   # âœ… FIX
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langgraph.graph import StateGraph, START, END





In [3]:
# Paths & models
DATA_DIR = r"C:\Users\PMLS\Downloads\langgraph\lmkr_data"
INDEX_NAME = "lmkr-self-rag"

LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
# LLM_MODEL = "meta-llama/Llama-3.1-8B-Instruct"


In [4]:
def hf_chat(messages, max_tokens=512, temperature=0.0):
    client = InferenceClient(
        model=LLM_MODEL,
        token=os.getenv("HF_TOKEN"),
    )
    response = client.chat.completions.create(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
    )
    return response.choices[0].message["content"]


def extract_json(text: str) -> dict:
    start, end = text.find("{"), text.rfind("}")
    return json.loads(text[start : end + 1])


In [5]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

if INDEX_NAME not in pc.list_indexes().names():
    pc.create_index(
        name=INDEX_NAME,
        dimension=384,  # all-MiniLM-L6-v2
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region=os.getenv("PINECONE_ENVIRONMENT"),
        ),
    )


In [8]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

def ingest_documents():
    docs = []
    for f in os.listdir(DATA_DIR):
        if f.endswith(".txt"):
            docs.extend(
                TextLoader(
                    os.path.join(DATA_DIR, f),
                    encoding="utf-8",
                ).load()
            )

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=600,
        chunk_overlap=50,
    )
    chunks = splitter.split_documents(docs)

    PineconeVectorStore.from_documents(
    documents=chunks,
    embedding=embeddings,
    index_name=INDEX_NAME,
)



In [9]:
index = pc.Index(INDEX_NAME)
if index.describe_index_stats()["total_vector_count"] == 0:
    ingest_documents()


In [10]:
vectorstore = PineconeVectorStore(
    index_name=INDEX_NAME,
    embedding=embeddings,
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 6})


In [26]:
def safe_binary_grade(text: str) -> str:
    """
    Robustly extract yes/no from HF model output.
    Defaults to 'no' if uncertain (conservative).
    """
    try:
        data = extract_json(text)
    except Exception:
        return "no"

    for key in ["binary_score", "score", "answer", "label"]:
        if key in data:
            val = str(data[key]).strip().lower()
            if val in ["yes", "no"]:
                return val

    return "no"


In [11]:
class BinaryGrade(BaseModel):
    binary_score: Literal["yes", "no"]


class GraphState(TypedDict):
    question: str
    documents: List[str]
    generation: str


In [19]:
def retrieve(state: GraphState):
    docs = retriever.invoke(state["question"])
    return {
        "question": state["question"],
        "documents": [d.page_content for d in docs],
        "generation": "",
    }


In [28]:
def grade_docs(state: GraphState):
    filtered = []

    for d in state["documents"]:
        result = hf_chat([
            {"role": "system", "content": "Return ONLY JSON."},
            {"role": "user", "content":
                f"Question:\n{state['question']}\n\n"
                f"Document:\n{d}\n\n"
                "Is it relevant? Respond as JSON with yes/no."
            },
        ])

        if safe_binary_grade(result) == "yes":
            filtered.append(d)

    return {**state, "documents": filtered}


In [21]:
def generate(state: GraphState):
    answer = hf_chat([
        {"role": "system", "content": "Use ONLY the provided context."},
        {"role": "user", "content":
            f"Context:\n{state['documents']}\n\n"
            f"Question:\n{state['question']}"
        },
    ], temperature=0.2)

    return {**state, "generation": answer}


In [22]:
def rewrite_query(state: GraphState):
    new_q = hf_chat([
        {"role": "system", "content": "Rewrite the question to improve retrieval."},
        {"role": "user", "content": state["question"]},
    ])
    return {"question": new_q, "documents": [], "generation": ""}


In [29]:
def decide_docs(state):
    return "rewrite" if len(state["documents"]) == 0 else "generate"


def grade_answer(state):
    result = hf_chat([
        {"role": "system", "content": "Return ONLY JSON."},
        {"role": "user", "content":
            f"Documents:\n{state['documents']}\n\n"
            f"Answer:\n{state['generation']}\n\n"
            "Is the answer grounded? {\"binary_score\":\"yes\"|\"no\"}"
        },
    ])
    return END if safe_binary_grade(result) == "yes" else "rewrite"



In [30]:
workflow = StateGraph(GraphState)

workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_docs", grade_docs)
workflow.add_node("generate", generate)
workflow.add_node("rewrite", rewrite_query)

workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "grade_docs")

workflow.add_conditional_edges(
    "grade_docs",
    decide_docs,
    {"rewrite": "rewrite", "generate": "generate"},
)

workflow.add_conditional_edges(
    "generate",
    grade_answer,
    {END: END, "rewrite": "rewrite"},
)

workflow.add_edge("rewrite", "retrieve")

app = workflow.compile()


In [32]:
question = "What is lmkr?"
result = app.invoke({"question": question})

print("FINAL ANSWER:\n")
print(result["generation"])


HfHubHTTPError: 402 Client Error: Payment Required for url: https://router.huggingface.co/featherless-ai/v1/chat/completions (Request ID: Root=1-69401a15-5696b0323137c1d438a0c436;483a5384-4a80-4dda-b3b7-3474002e24b1)

You have reached the free monthly usage limit for featherless-ai. Subscribe to PRO to get 20x more included usage, or add pre-paid credits to your account.