Assumes Langchain v.0.3.4


In [1]:
# %pip install --upgrade --quiet langchain-openai
# %pip install langchain-qdrant
# % pip install streamlit

In [2]:
import streamlit as st

### Langsmith
accessible [here](https://smith.langchain.com/o/3941ecea-6957-508c-9f4f-08ed62dc7d61/projects/p/0aea481f-080e-45eb-bae1-2ae8ee246bd9)

In [3]:
# These have to be set as environmental variables to be accessed behind the scenes
import os
from dotenv import load_dotenv, find_dotenv

env_path = find_dotenv()
load_dotenv(env_path)

# os.environ["LANGCHAIN_TRACING_V2"] = st.secrets["LANGCHAIN_TRACING_V2"]
# os.environ["LANGCHAIN_PROJECT"] = st.secrets["LANGCHAIN_PROJECT"]

os.environ["LANGCHAIN_API_KEY"] = st.secrets["LANGCHAIN_API_KEY"]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "ASK"


In [4]:
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
open_api_key = st.secrets["OPENAI_API_KEY"]

In [5]:
from qdrant_client import QdrantClient
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain.vectorstores import Qdrant Deprecated
from langchain_qdrant import QdrantVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings

In [6]:
config = {
    # langchain. No longer needs the API key parameter in 0.3.4
    "embedding": OpenAIEmbeddings(),
    "embedding_dims": 1536,
    "search_type": "mmr",
    "k": 5,
    'fetch_k': 20,   # fetch 30 docs then select 4
    'lambda_mult': .7,    # 0= max diversity, 1 is min. default is 0.5
    "score_threshold": 0.5,
    "model": "gpt-3.5-turbo-16k",
    "temperature": 0.7,
    "chain_type": "stuff",  # a LangChain parameter
}

In [7]:
from langchain_openai import ChatOpenAI

# keep outside the function so it's accessible elsewhere in this notebook
llm = ChatOpenAI(model=config["model"], temperature=config["temperature"])
query = []

In [8]:
qdrant_collection_name = "ASK_vectorstore"
qdrant_path = "/tmp/local_qdrant"


def get_retriever():
    '''Creates and caches the document retriever and Qdrant client.'''

    client = QdrantClient(
        url=st.secrets["QDRANT_URL"],
        prefer_grpc=True,
        api_key=st.secrets["QDRANT_API_KEY"]
    )  # cloud instance
    # client = QdrantClient(path="/tmp/local_qdrant" )  # local instance: /private/tmp/local_qdrant

# Qdrant is deprecated. Use this instead. Notice embedding is singular
    qdrant = QdrantVectorStore(
        client=client,
        collection_name=qdrant_collection_name,
        embedding=config["embedding"]
    )

    retriever = qdrant.as_retriever(
        search_type=config["search_type"],
        search_kwargs={'k': config["k"], "fetch_k": config["fetch_k"],
                       "lambda_mult": config["lambda_mult"], "filter": None},  # filter documents by metadata
    )

    return retriever

In [9]:
# tessting the retriever initializes
retriever = get_retriever()

In [10]:
# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [11]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

### Run using the built-in create_retrieval_chain, which returns sources by default;

Note that `result` is a dict with keys `"input"`, `"context"`, and `"answer"`:


In [12]:
result = rag_chain.invoke({"input": "What is the Auxiliary?"})

result

{'input': 'What is the Auxiliary?',
 'context': [Document(metadata={'page': 114, 'source': 'References/Auxiliary Manual CIM_16790_1G.pdf', '_id': '89ef5ec1-475c-45fb-bba6-5f7b0dcac8c3', '_collection_name': 'ASK_vectorstore'}, page_content='COMDTINST M16790.1G \n \n \n \n \n \n \nSection A. Eligibility \nIntroduction The Auxiliary is an organiza tion of uniformed volunteers.  Pursuant to \nprovisions of this chapter, m embership is open to any citizen of the U.S. and \nits territories and possessions who is seventeen years of age or older. \nMembership is predicated on successful completion of the Auxiliary \nenrollment application (including the passing of the New Member Exam), \nand receipt of  a favorable (FAV) Personnel Secu rity Investigation (PSI) in \naccordance with the Personnel Security and Suitability Program Manual, \nCOMDTINST M5520.12 (series), and pa ragraph C.3 of this chapter . \nIndividuals who hold dual citizenship w ith the U.S. and another country may \nbecome membe

### Custom LCEL implementation

This constructs a chain similar to those built by `create_retrieval_chain` above.

It works by building up a dict with the input query,
then add the retrieved docs in the `"context"` key;
Feed both the query and context into a RAG chain and add the result to the dict.


In [23]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# This Runnable takes a dict with keys 'input' and 'context',
# formats them into a prompt, and generates a response.
rag_chain_from_docs = (
    {
        "input": lambda x: x["input"],  # input query
        "context": lambda x: format_docs(x["context"]),  # context
    }
    | prompt  # format query and context into prompt
    | llm  # generate response
    | StrOutputParser()  # coerce to string
)

# Pass input query to retriever
retrieve_docs = (lambda x: x["input"]) | retriever

# Below, we chain `.assign` calls. This takes a dict and successively
# adds keys-- "context" and "answer"-- where the value for each key
# is determined by a Runnable. The Runnable operates on all existing
# keys in the dict.
chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

chain.invoke({"input": "What is Task Decomposition"})

{'input': 'What is Task Decomposition',
 'context': [Document(metadata={'page': 15, 'source': 'References/Gold Side/Auxiliary Boat Crew Qualification Handbook- ABQH 16115.52B.pdf', '_id': '6b607953-80a5-4b67-9e78-45524c598f31', '_collection_name': 'ASK_vectorstore'}, page_content=' Part 1 –  Introduction to Auxiliary Boat Crew Qualification System  \n  Chapter 4 – Task Designations  \n  \n  \n1-5 \n \nCHAPTER 4  \nTask Designations  \n \n \nA.1. Task Designation \nComponents  A task designation is comprised of three elements followed by the word \n“AUX ”. The three elements of a task designation are:  \n(01) Qualification  \n(02) Division Designation Number  \n(03) Task Designation Number  \nA.2. Task Structure  Below is an  example : \nBCM -07-05-AUX  \n     Indicated that the task must be accomplished on an Auxiliary \nfacility .  \n   Task designation number.  The task is a knowledge or skill objective to be performed.   \n  Division designation number   \n Qualification  designatio

### Include the sources in model's response


Up to this point, we've simply propagated the documents returned from the retrieval step through to the final response. But this may not illustrate what subset of the retrieved information the model relied on when generating its answer. Below, we show how to structure sources into the model response, allowing the model to report what specific context it relied on for its answer.

Because the above LCEL implementation is composed of Runnable primitives, it is straightforward to extend. Below, we make a simple change:

We use the model's tool-calling features to generate structured output, consisting of an answer and list of sources. The schema for the response is represented in the `AnswerWithSources` TypedDict, below.
We remove the `StrOutputParser()`, as we expect `dict` output in this scenario.


In [24]:
from typing import List

from langchain_core.runnables import RunnablePassthrough
from typing_extensions import Annotated, TypedDict


# Desired schema for response
class AnswerWithSources(TypedDict):
    """An answer to the question, with sources."""

    answer: str
    sources: Annotated[
        List[str],
        ...,
        "List of sources (author + year) used to answer the question",
    ]


# Our rag_chain_from_docs has the following changes:
# - add `.with_structured_output` to the LLM;
# - remove the output parser
rag_chain_from_docs = (
    {
        "input": lambda x: x["input"],
        "context": lambda x: format_docs(x["context"]),
    }
    | prompt
    | llm.with_structured_output(AnswerWithSources)
)

retrieve_docs = (lambda x: x["input"]) | retriever

chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

response = chain.invoke({"input": "What are the requirements to run for FC?"})

THis outputs the model's response as well as the subset of retrieved information that it used to infer its response.

Note that the `answer` element in the `response` disctionary is itself a dictionary containing `answer` and `source` keys


In [25]:
import json

print(json.dumps(response["answer"], indent=2))

{
  "answer": "To run for FC (Flotilla Commander), the requirements include: having a Favorable Operational Support Personnel Security Investigation (PSI) determination, completing one year as a member of the Auxiliary, successfully completing a relevant course (Administrative Procedures Course, Flotilla Leadership Course, or Basic Qualification II Course), being a member of the flotilla in which they seek elected office, and meeting certain eligibility criteria such as obtaining an Auxiliary qualification, serving as a staff officer or elected officer for two years, or having 25 hours of documented activity in Auxiliary programs or mission areas.",
  "sources": [
    "1-8 a. (CG Auxiliary Manual)"
  ]
}


### Since the response object also contains-- the original query, all the retrieved docs, the LLM response, and the sources used by the model to generate its answer-- we can also list the titles of the retrieved documents and the source page content


In [26]:
'''
    item.page_content
    item.metadata['source']
    item.metadata['page']
'''
print("Sources:")
for item in response["context"]:
    print(
        f"{item.metadata['source']} page {item.metadata['page']}" + "\n")

Sources:
References/Gold Side/Flotilla_Procedures_Guide_FINAL_ESIGNED_23MAR23.pdf page 172

References/Gold Side/Flotilla_Procedures_Guide_FINAL_ESIGNED_23MAR23.pdf page 21

References/Gold Side/Flotilla_Procedures_Guide_FINAL_ESIGNED_23MAR23.pdf page 28

References/Gold Side/Flotilla_Procedures_Guide_FINAL_ESIGNED_23MAR23.pdf page 80

References/Gold Side/Flotilla_Procedures_Guide_FINAL_ESIGNED_23MAR23.pdf page 22



### THis one is formatted in the same way as the short source list in ASK


In [None]:
markdown_list = []

for i, doc in enumerate(response['context'], start=1):
    page_content = doc.page_content
    source = doc.metadata['source']
    short_source = source.split('/')[-1].split('.')[0]
    page = doc.metadata['page']
    markdown_list.append(f"*{short_source}*, page {page}\n")

short_source_list = '\n'.join(markdown_list)
print(short_source_list)

### THis one is formatted in the same way as the long source list in ASK


In [None]:
markdown_list = []

for i, doc in enumerate(response['context'], start=1):
    page_content = doc.page_content
    source = doc.metadata['source']
    short_source = source.split('/')[-1].split('.')[0]
    page = doc.metadata['page']
    markdown_list.append(
        f"**Reference {i}:**    *{short_source}*, page {page}   {page_content}\n")

long_source_list = '\n'.join(markdown_list)
print(long_source_list)

### For reference, here's the full response object. You can see it contains the original query all the retrieved docs, the LLM response, and the sources used by the model to generate its answer.


In [None]:
import json

print(json.dumps(response, indent=2, default=str))