# 1. Packages

In [32]:
import os 
from getpass import getpass 

from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings

from operator import itemgetter
from typing import Dict, List

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import Runnable, RunnableParallel, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_core.vectorstores import VectorStore
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from langchain_core.runnables import chain
from pinecone import Pinecone

from pinecone.data.index import Index

# 2. Setup

In [2]:
LANGCHAIN_API_KEY = getpass()

 ········


In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"

In [16]:
MISTRAL_API_KEY = getpass()

 ········


In [17]:
os.environ["MISTRAL_API_KEY"] = MISTRAL_API_KEY

In [21]:
mistral = ChatMistralAI(model="open-mistral-nemo", api_key=MISTRAL_API_KEY)
embeddings = MistralAIEmbeddings(model="mistral-embed", api_key=MISTRAL_API_KEY)



In [22]:
PINECONE_API_KEY = getpass()

 ········


In [23]:
PINECONE_INDEX_NAME = "knowledgenest-dev"

In [24]:
from langchain_core.messages import AIMessage, HumanMessage

# 3. KN Setup

In [None]:
def get_vector_db():
    pc = Pinecone(api_key=PINECONE_API_KEY)
    return pc.Index(PINECONE_INDEX_NAME)

In [None]:
def create_retriever(vectorstore: VectorStore, filter: Dict) -> Runnable:
    """Create and returns a retriever with the specified filters"""

    @chain
    def retrieve(query: str) -> List[Document]:
        if results := vectorstore.similarity_search_with_score(
            query, k=3, filter=filter
        ):
            docs, scores = zip(*results)
            for doc, score in zip(docs, scores):
                doc.metadata["score"] = score

            return list(docs)
        return []

    return retrieve


def get_chain():
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SYSTEM_PROMPT),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    index = get_vector_db()
    llm = mistral
    # We always embed with mistral for index consistency
    embeddings = MistralAIEmbeddings(model="mistral-embed")
    vector_store = PineconeVectorStore(index=index, embedding=embeddings)
    retriever = create_retriever(vector_store, None)
    chain = (
        dict(docs=parse_retriever_input | retriever, messages=itemgetter("messages"))
        | RunnableParallel(
            context=itemgetter("docs") | RunnableLambda(format_docs),
            messages=itemgetter("messages"),
            sources=itemgetter("docs") | RunnableLambda(parse_sources),
        )
        | RunnableParallel(
            prompt=prompt,
            sources=itemgetter("sources"),
        )
        | RunnableParallel(
            output=itemgetter("prompt") | llm | StrOutputParser(),
            sources=itemgetter("sources"),
        )
    )
    return chain


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def parse_retriever_input(params: Dict):
    return params["messages"][-1].content


def parse_sources(docs: List[Document]) -> List[Dict]:
    """Extract unique sources with useful information"""
    sources = {}
    for doc in docs:
        doc_id = doc.metadata["content_id"]
        if doc_id not in sources:
            sources[doc_id] = {
                "id": doc_id,
                "type": doc.metadata["type"],
                "score": doc.metadata["score"],
            }

        elif sources[doc_id]["score"] < doc.metadata["score"]:
            # We take the best score of each document
            sources[doc_id]["score"] = doc.metadata["score"]
    return list(sources.values())

In [47]:
SYSTEM_PROMPT = """You are a useful assistant that answers politey to users questions. 
            Your answers are based on your general knowledge but 
            you primarily based on the below context when it is useful :\n\n{context}"""

In [34]:
llm = get_chain()

In [35]:
llm.invoke(dict(messages=[HumanMessage("helo")]))

{'output': 'Hello! How can I assist you today?',
 'sources': [{'id': '1da83de6-94ce-4a9c-9d0b-7c70859c718a',
   'type': 'article',
   'score': 0.637567222},
  {'id': '5555acdb-aba5-4d5a-b8d3-e6a1ee24ffde',
   'type': 'article',
   'score': 0.636965513}]}

# 4. Evals

### A. Create Dataset

In [37]:
from langsmith import Client
from langsmith.evaluation import evaluate

In [38]:
client = Client()
base_dataset_name = "kn-eval-perf"

In [39]:
# Test QA
inputs = [
    "What are the challenges when building a real time machine learning system",
    "What are the different components of the Feature Training Inference architecture ?",
    "How were machine learning systems built at the beginning ?",
    "What is the portfolio theory of the firm ?",
    "What is founder mode ?"
]

In [None]:
dataset_name = base_dataset_name + "-v2"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Input question of RAG KN",
)
client.create_examples(
    inputs=[{"question": q} for q in inputs],
    dataset_id=dataset.id,
)

### B. Evaluate RAG pipeline

In [41]:
def predict_rag_answer(example: dict):
    """Use this for answer evaluation"""
    llm =  get_chain()
    response = llm.invoke(dict(messages=[HumanMessage(example["question"])]))
    return {"answer": response["output"]}

In [42]:
test_results = evaluate(
    predict_rag_answer,
    data=dataset_name,
    experiment_prefix="MistralNemoFullChain",
    num_repetitions=3,
)

View the evaluation results for experiment: 'MistralNemoFullChain-854505a2' at:
https://smith.langchain.com/o/a700c4b6-5caf-57dc-a929-900e043ce283/datasets/69a30af2-2443-4053-a943-de470933914e/compare?selectedSessions=0c52b2aa-17c7-4100-8944-62337614a754




0it [00:00, ?it/s]



### C. Evaluate Simpler chain

In [48]:
SIMPLE_SYSTEM_PROMPT = """You are a useful assistant that answers politey to users questions."""

def get_simple_chain():
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SIMPLE_SYSTEM_PROMPT),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    llm = mistral
    chain = prompt | llm | StrOutputParser()
    return chain

In [43]:
import warnings
warnings.filterwarnings('ignore')

In [51]:
def predict_nemo_answer(example: dict):
    """Use this for answer evaluation"""
    llm = get_simple_chain()
    response = llm.invoke(dict(messages=[HumanMessage(example["question"])]))
    return {"answer": response}

In [52]:
predict_nemo_answer(dict(question=inputs[0]))

{'answer': 'Building a real-time machine learning system presents several challenges. Here are some of the key ones, along with polite responses to each:\n\n1. **Latency and Speed**:\n   - *Challenge*: Real-time systems require immediate responses, often within milliseconds. This can be challenging, especially for complex models that might take seconds or even minutes to process data.\n   - *Polite response*: "I understand that speed is of the essence in real-time systems. We\'ll need to ensure our model is efficient and perhaps explore techniques like model pruning or hardware acceleration to meet the latency requirements."\n\n2. **Data Quality and Availability**:\n   - *Challenge*: Real-time systems often deal with streaming data, which can be noisy, incomplete, or biased. Ensuring the model receives high-quality, representative data can be difficult.\n   - *Polite response*: "I appreciate your concern about data quality. We\'ll need to implement robust data preprocessing and validat

In [53]:
test_results = evaluate(
    predict_nemo_answer,
    data=dataset_name,
    experiment_prefix="MistralNemoSolo",
    num_repetitions=3,
)

View the evaluation results for experiment: 'MistralNemoSolo-bcf1075c' at:
https://smith.langchain.com/o/a700c4b6-5caf-57dc-a929-900e043ce283/datasets/69a30af2-2443-4053-a943-de470933914e/compare?selectedSessions=18d0e55b-22b0-4e10-807f-6997cbc16851




0it [00:00, ?it/s]

### D. Evaluate changes in rag chain code

In [70]:
from langsmith.run_helpers import traceable

In [65]:
def get_chain_v2():
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SYSTEM_PROMPT),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    index = get_vector_db()
    llm = mistral
    # We always embed with mistral for index consistency
    embeddings = MistralAIEmbeddings(model="mistral-embed")
    vector_store = PineconeVectorStore(index=index, embedding=embeddings)
    retriever = create_retriever(vector_store, None)
    retriever_chain = parse_retriever_input | retriever
    llm_chain = prompt | llm | StrOutputParser()

    @traceable()
    def kn_rag(params: dict):
        docs = retriever_chain.invoke(params)
        sources = parse_sources(docs)
        formatted_docs = format_docs(docs)
        output = llm_chain.invoke(dict(**params, context=formatted_docs))
        return sources, output

    return kn_rag

In [66]:
llm = get_chain_v2()
response = llm.invoke(dict(messages=[HumanMessage(inputs[0])]))

In [67]:
response

([{'id': '5db351c7-984b-4c3c-bc90-6d3cc0013d63',
   'type': 'video',
   'score': 0.864119709},
  {'id': 'a0790e62-19d8-4341-8f2d-e46ee6934f31',
   'type': 'article',
   'score': 0.859660447}],
 'Based on the context provided, here are some key challenges when building a real-time machine learning system:\n\n1. **Data Streams and Ingestion:**\n   - Handling continuous, real-time data streams efficiently.\n   - Ingesting and preprocessing data in real-time, which can be challenging due to the volume, velocity, and variety of data.\n   - Ensuring data quality and consistency in real-time.\n\n2. **Model Serving:**\n   - Deploying and serving models in real-time with low latency.\n   - Scaling the model serving infrastructure to handle increased load.\n   - Ensuring high availability and fault tolerance of the model serving system.\n\n3. **Feature Store and Context:**\n   - Managing and serving features in real-time, especially when historical or contextual data is required.\n   - Ensuring 

In [68]:
def predict_clean_rag_answer(example: dict):
    """Use this for answer evaluation"""
    llm = get_chain_v2()
    response = llm.invoke(dict(messages=[HumanMessage(example["question"])]))
    return {"answer": response[1]} # get only the response 

In [69]:
test_results = evaluate(
    predict_clean_rag_answer,
    data=dataset_name,
    experiment_prefix="MistralNemoRagClean",
    num_repetitions=3,
)

View the evaluation results for experiment: 'MistralNemoRagClean-b783bd59' at:
https://smith.langchain.com/o/a700c4b6-5caf-57dc-a929-900e043ce283/datasets/69a30af2-2443-4053-a943-de470933914e/compare?selectedSessions=84d82ff6-299f-439f-8582-ffb3fd84a83c




0it [00:00, ?it/s]

Error running target function: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}
An error occurred with MistralAI: 'data'
Error running target function: 'data'


### E. Put the chain in a class

In [None]:
}

In [82]:
class KNRag:

    def __init__(self):
        #self._user_id = user_id
        #self._provider = provider
        self._retriever = self._init_retriever()
        self._llm = self._init_llm()

    @traceable()
    def answer(self, params: dict):
        docs = self._retriever.invoke(params)
        sources = parse_sources(docs)
        formatted_docs = format_docs(docs)
        output = self._llm.invoke(dict(**params, context=formatted_docs))
        return {
            "sources": sources,
            "output": output
        }

    def _init_retriever(self):
        index = get_vector_db()
        embeddings = MistralAIEmbeddings(model="mistral-embed")
        vector_store = PineconeVectorStore(index=index, embedding=embeddings)
        retriever = self._create_retriever(vector_store, None)
        
        retriever_chain = parse_retriever_input | retriever
        return retriever_chain

    def _init_llm(self):
        llm = mistral
        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", SYSTEM_PROMPT),
                MessagesPlaceholder(variable_name="messages"),
            ]
        )
        llm_chain = prompt | llm | StrOutputParser()
        return llm_chain

    def _format_docs(self, docs):
        return "\n\n".join(doc.page_content for doc in docs)


    def _parse_retriever_input(self, params: Dict):
        return params["messages"][-1].content
    
    
    def _parse_sources(self, docs: List[Document]) -> List[Dict]:
        """Extract unique sources with useful information"""
        sources = {}
        for doc in docs:
            doc_id = doc.metadata["content_id"]
            if doc_id not in sources:
                sources[doc_id] = {
                    "id": doc_id,
                    "type": doc.metadata["type"],
                    "score": doc.metadata["score"],
                }
    
            elif sources[doc_id]["score"] < doc.metadata["score"]:
                # We take the best score of each document
                sources[doc_id]["score"] = doc.metadata["score"]
        return list(sources.values())
    
    def _create_retriever(self, vectorstore: VectorStore, filter: Dict) -> Runnable:
        """Create and returns a retriever with the specified filters"""
    
        @chain
        def retrieve(query: str) -> List[Document]:
            if results := vectorstore.similarity_search_with_score(
                query, k=3, filter=filter
            ):
                docs, scores = zip(*results)
                for doc, score in zip(docs, scores):
                    doc.metadata["score"] = score
    
                return list(docs)
            return []
    
        return retrieve

In [83]:
chatbot = KNRag()

In [84]:
response = chatbot.answer(dict(messages=[HumanMessage(inputs[0])]))

In [85]:
response

{'sources': [{'id': '5db351c7-984b-4c3c-bc90-6d3cc0013d63',
   'type': 'video',
   'score': 0.864119709},
  {'id': 'a0790e62-19d8-4341-8f2d-e46ee6934f31',
   'type': 'article',
   'score': 0.859660447}],
 'output': "Based on the provided context, here are the key challenges when building a real-time machine learning system:\n\n1. **Separation of Training and Serving**: Real-time systems require separate pipelines for offline training and online model serving. This separation can make it challenging to ensure consistent features between training and serving.\n\n2. **Feature Management**: In real-time systems, features are often computed using data provided in the request. This means the model has no access to historical or contextual data. Ensuring that the features used for training and serving are consistent can be complex. One solution is to version the feature creation source code and use the same version in both training and serving.\n\n3. **Latency and Throughput**: Real-time syst

In [92]:
def predict_class_rag_answer(example: dict):
    """Use this for answer evaluation"""
    llm = KNRag()
    response = llm.answer(dict(messages=[HumanMessage(example["question"])]))
    return {"answer": response["output"]} # get only the response 

In [93]:
test_results = evaluate(
    predict_class_rag_answer,
    data=dataset_name,
    experiment_prefix="MistralNemoRagClass",
    num_repetitions=3,
)

View the evaluation results for experiment: 'MistralNemoRagClass-77d5617b' at:
https://smith.langchain.com/o/a700c4b6-5caf-57dc-a929-900e043ce283/datasets/69a30af2-2443-4053-a943-de470933914e/compare?selectedSessions=77ed51fe-f03f-4259-a4e5-fc070ca6850c




0it [00:00, ?it/s]

Error running target function: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}
Error running target function: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}
Error running target function: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}
An error occurred with MistralAI: 'data'
Error running target function: 'data'
An error occurred with MistralAI: 'data'
Error running target function: 'data'
An error occurred with MistralAI: 'data'
Error running target function: 'data'
An error occurred with MistralAI: 'data'
Error running target function: 'data'
