# Document Loading

## Setup dependencies

In [None]:
%pip install -q -U langchain langchain-community langchain-chroma langchain-openai faiss-cpu
%pip install -q -U langchain_experimental lastmile-eval "lastmile-eval[ui]"
%pip install -q -U python-dotenv
%pip install "tracing-auto-instrumentation[langchain]" --upgrade

## Set Up Environment

In [None]:
import dotenv
dotenv.load_dotenv()

## Load documents

In [None]:
from langchain_community.document_loaders import DirectoryLoader 
from langchain_community.document_loaders.text import TextLoader 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain.vectorstores import Chroma 
from langchain_openai.embeddings import OpenAIEmbeddings

# from tracing_auto_instrumentation.langchain import LangChainInstrumentor

# # Create an instance of LangChainInstrumentor and instrument the code
# instrumentor = LangChainInstrumentor(project_name="RAG Eval Test")
# instrumentor.instrument()

print("Loading documents...")
knowledgeDirectoryPath = "knowledge/structured"
loader = DirectoryLoader(knowledgeDirectoryPath, glob="**/*.*", loader_cls=TextLoader) 
loaded_docs = loader.load() 

embeddings = OpenAIEmbeddings(model="text-embedding-3-large") 

print("Splitting documents...")
#splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) 
splitter = SemanticChunker(embeddings)
chunks = splitter.split_documents(loaded_docs) 

print("Creating embeddings...")
vector_store = Chroma.from_documents(chunks, embeddings) 

print("Done loading documents")


## Query documents

In [None]:
def retrieve_docs(query):
    # Get documents similar to the query ith their scores
    docs_and_score = vector_store.similarity_search_with_score(query, k=5)
    
    # Remove duplicates and unrelated documents
    unique_docs = []
    unique_docs_and_score = []
    seen = set()
    ordered_byScore = sorted(docs_and_score, key=lambda x: x[1])
    for doc in ordered_byScore:
        content = doc[0].page_content
        score = doc[1]
        if score < 1 and content not in seen:
            unique_docs.append(content)
            unique_docs_and_score.append(doc)
            seen.add(content)
            
    context = "\n\n".join(unique_docs)
    
    return context, unique_docs_and_score

## Send query to retriever

In [None]:
context, results = retrieve_docs("I'm a new associate. What should I do to be successful?")
# context, results = retrieve_docs("What is the capital of Argentina?")

results

## Send query to LLM

In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage
from lastmile_eval.rag.debugger.tracing import get_lastmile_tracer
from lastmile_eval.rag.debugger.common.types import RagFlowType
from lastmile_eval.rag.debugger.api import LastMileTracer

SCOPED_PROMPT = """
If the answer to the user's question is not contained in the provided context, answer 🤷.
"""
# If the answer to the user question is not contained in the provided context and cannot be inferred from it, 
# answer 🤷.
# """

USE_MARKDOWN_PROMPT = """
Use Markdown to format your response.
"""

TRANSPARENT_CONTEXT = """
Do not mention the context in your answer.
"""

PROJECT_NAME = "RAG Test 4"

# Instantiate LastMile Tracer object
tracer: LastMileTracer = get_lastmile_tracer(
    tracer_name="my-tracer",
    project_name=PROJECT_NAME,
    rag_flow_type=RagFlowType.QUERY,
)

tracer: LastMileTracer = get_lastmile_tracer("My-Project")

def get_prompt(instructions, scoped_answer, use_markdown, context):
    content = instructions \
        + TRANSPARENT_CONTEXT \
        + (SCOPED_PROMPT if scoped_answer else "") \
        + (USE_MARKDOWN_PROMPT if use_markdown else "") \
        + "\nContext:\n" + context 
    prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessage(content=content),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    return prompt

@tracer.trace_function()
def get_response(query, modelfamily, model, instructions, scoped_answer, use_markdown, temperature):    
    client = get_client(modelfamily, model, temperature)
    context, docs_with_scores = retrieve_docs(query)
    prompt = get_prompt(instructions, scoped_answer, use_markdown, context)

    chain = prompt | client
    response = chain.invoke({"messages": [HumanMessage(content=query)]})
    
    metadata = get_metadata(modelfamily, model, response)
    
    # Log query event to the trace
    tracer.add_query_event(
        query=query,
        llm_output=response.content,
        system_prompt="system prompt",
        metadata={"llm_name": model, "temperature": temperature, "model_family": modelfamily},
    )
    
    return {
        "content": response.content,
        "docs_with_scores": docs_with_scores,
        "metadata": metadata,
        "prompt": prompt,
        }
    
def get_client(model_family, model, temperature):
    if model_family == "openai":
        from langchain_openai import ChatOpenAI
        model = ChatOpenAI(model=model, temperature=temperature)
    elif model_family == "anthropic":
        from langchain_anthropic import ChatAnthropic
        model = ChatAnthropic(model=model, temperature=temperature)
    else:
        raise ValueError(f"Model {model_family} not recognized")
    
    return model

def get_metadata(model_family, model, response):
    if model_family == "anthropic":
        usage = {k: v for k, v in response.response_metadata["usage"].items() if k in ("input_tokens", "output_tokens")}
    elif model_family == "openai":
        usage = {k: v for k, v in response.usage_metadata.items() if k in ("input_tokens", "output_tokens")}
    else:
        raise ValueError(f"Model family {model_family} not recognized")
    
    total_tokens = usage["input_tokens"] + usage["output_tokens"]

    return {"model": model, **usage, "total_tokens": total_tokens}


In [None]:
get_response(
    "I'm a new associate. What should I do to be successful?",                  # query
    "openai",                                                                   # Model family
    "gpt-3.5-turbo",                                                            # Model
    "",                                                                         # Instructions       
    True,                                                                       # Scoped answer    
    True,                                                                       # Use markdown
    0                                                                           # Temperature  
    )