# Retrieval Augmented Generation for USCG Auxiliary

This goal of this project is to create a PDF querying system that enables a user to perform complex searches on a collection of USCG Auxiliary reference documents and obtain specific and accurate data back using only the documents provided. It leverages LangChain, a powerful language processing tool, to extract information from PDF documents.


## 0. Installs and Imports


In [None]:
# %pip install --upgrade pip
# %pip list # See what's installed and versions


# %pip install --upgrade langchain
# %pip install --upgrade docarray
# %pip install python-doten
# %pip install --upgrade wandb
# %pip install qdrant-client # applies to all qdrant implementations
# %pip install pypdf
# %pip install git+https://github.com/pikepdf/pikepdf.git#egg=pikepdf this requies python>=3.9

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

## 1. Set the model parameters


In [2]:
from langchain.embeddings import OpenAIEmbeddings

config = {
    "splitter_type": "CharacterTextSplitter",
    "chunk_size": 2000,
    "chunk_overlap": 200,
    "length_function": len,
    "separators": ["}"],  # [" ", ",", "\n"]
    "embedding": OpenAIEmbeddings(),
    "embedding_dims": 1536,
    "search_type": "mmr",
    'fetch_k': 20,   # number of documents to pass to the search alg (eg., mmr)
    "k": 5,  # number of document from fetch to pass to the LLM for inference
    'lambda_mult': .7,    # 0= max diversity, 1 is max relevance. default is 0.5
    "score_threshold": 0.5,  # for similarity score
    "model": "gpt-3.5-turbo-16k",  # gpt-4, gpt-3.5-turbo-16k
    "temperature": 0.7,
    "chain_type": "stuff",
}

OPTIONAL: Langchain debugging

In [None]:
from langchain.globals import set_debug

set_debug(False)

In [3]:
qdrant_collection_name = "ASK_vectorstore"
# Only required for local instance (actual location is MacHD: private tmp local_qdrant)
qdrant_path = "/Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/qdrant"
# qdrant_path = "/tmp/local_qdrant"

## 3. Chunk 'n' Load


In [13]:
import os
import pypdf
from langchain.document_loaders import PyPDFLoader


def extract_metadata_from_pdfs(path_to_ingest_files):
    file_list = []
    pages = []
    total_size = 0

    # Check if the path is a directory or a file
    if os.path.isdir(path_to_ingest_files):
        print("Loading PDFs from directory...")
        for foldername, subfolders, filenames in os.walk(path_to_ingest_files):
            for file in filenames:
                if file.lower().endswith('.pdf'):
                    process_pdf(os.path.join(foldername, file),
                                file_list, pages, total_size)
    elif os.path.isfile(path_to_ingest_files) and path_to_ingest_files.lower().endswith('.pdf'):
        print("Loading a single PDF file...")
        process_pdf(path_to_ingest_files, file_list, pages, total_size)
    else:
        print(
            f"Error: The path '{path_to_ingest_files}' is not a valid directory or PDF file!")

    return pages


def process_pdf(pdf_path, file_list, pages, total_size):
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        file_processed = False  # Flag to track if the file has been processed

        for doc in documents:
            with open(doc.metadata["source"], "rb") as pdf_file_obj:
                reader = pypdf.PdfReader(pdf_file_obj)
                pdf_metadata = reader.metadata
                doc.metadata.update(
                    {key: pdf_metadata[key] for key in pdf_metadata.keys()})

            pages.append(doc)
            if not file_processed:
                file_list.append(pdf_path.split('/')[-1])
                total_size += os.path.getsize(pdf_path)
                file_processed = True  # Set flag to True after processing the file

    except FileNotFoundError:
        print(f"Error: Could not find {pdf_path}")

    if file_processed:
        print(f"Processed {pdf_path.split('/')[-1]}")


path_to_ingest_files = "/Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/original_library_documents/CG_Auxiliary-specific"
# path_to_ingest_files = "/Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/PDF_metadata_complete"
# path_to_ingest_files = "/Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/PDF_metadata_complete/test"
# path_to_ingest_files = "/Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/diagnostics/"
pages = extract_metadata_from_pdfs(path_to_ingest_files)
if pages:
    last_page = pages[-1]
else:
    print("No pages were processed.")

Loading PDFs from directory...
Processed Auxiliary_Division_Procedures_Guide_COMDTPUB P16791.3_reprints1and2_2017-07-02.pdf
Processed 13_ENC_13_AUX_COVID19-RECONSTITUTE-GUIDE-V5-20JAN2023.pdf
Processed Auxiliary_PV_Manual 169796.3D_2020.pdf
Processed Auxiliary_Operations_Process_Guide_Volume_I-General_Surface_16798.31A.pdf


#### Creat chunks

In [14]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
# import pdf_concatter as concat


# chunks at the page break
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=config["chunk_size"],
    chunk_overlap=config["chunk_overlap"],
    length_function=config["length_function"],
    separators=config["separators"]
)


# concat.pages_to_page(pages) #concatenates all the pages of the pdf into one
chunks = text_splitter.split_documents(pages)
'''"chunks" is a list of objects of the class langchain.schema.document.Document'''
chunks[0]

Document(page_content='AUXILIARY \nDIVISION PROCEDURES GUIDE \n           \n   \nPUBLISHED FOR INSTRUCTIONAL PURPOSES \n \nCOMDTPUB P16791.3 \n \nREPRINT  INCLUDES  CHANGES  1  &  2', metadata={'source': '/Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/original_library_documents/CG_Auxiliary-specific/Auxiliary_Division_Procedures_Guide_COMDTPUB P16791.3_reprints1and2_2017-07-02.pdf', 'page': 0, '/ModDate': "D:20060330120708-06'00'", '/CreationDate': "D:20060329112532-06'00'", '/Creator': 'Canon ', '/Producer': ' '})

In [15]:
def print_document_load_summary():
    from pympler import asizeof
    import tiktoken

    encoding = tiktoken.encoding_for_model(config["model"])
    vectorstore_tokens = encoding.encode(str(chunks))
    num_vectorestore_tokens = len(vectorstore_tokens)
    num_chunks = len(chunks)
    # Qudrant's formula is memory_size in bytes = number_of_vectors * vector_dimension * 4 bytes * 1.5
    memory_size = num_chunks * config["embedding_dims"] * 4 * 1.5

    print(f"""
        Target folder: {path_to_ingest_files}
        Pages processed: {len(pages)}
        Text splitter: {config["splitter_type"]}
        Chunk size: {config["chunk_size"]} characters
        Chunk overlap: {config["chunk_overlap"]} characters
        Chunks (vectors) created: {num_chunks} 
        Dictionary size: {asizeof.asizeof(pages) / (1024 * 1024):.2f} MB
        Vectorstore tokens: {num_vectorestore_tokens}
        Estimated memory size (Qdrant): {memory_size / (1024 * 1024):.2f} MB
    """)

    ''' TODO These variables are now in a function so not accessible.    
        Document(s)loaded: {len(file_list)}
        Load size: {total_size / (1024 * 1024):.2f} MB
        '''


print_document_load_summary()


        Target folder: /Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/original_library_documents/CG_Auxiliary-specific
        Pages processed: 388
        Text splitter: CharacterTextSplitter
        Chunk size: 2000 characters
        Chunk overlap: 200 characters
        Chunks (vectors) created: 384 
        Dictionary size: 4.43 MB
        Vectorstore tokens: 265701
        Estimated memory size (Qdrant): 3.38 MB
    


## 4. OPTIONAL: Create NEW vector store and add documents into it


#### Combo Create + Add Docs

In [9]:
from qdrant_client import QdrantClient

client = QdrantClient()

In [10]:
from langchain.vectorstores.qdrant import Qdrant

qdrant = Qdrant(client=client,
                collection_name=qdrant_collection_name,
                # embedding here is LC interface to the embedding model
                embeddings=config["embedding"],
                )

In [11]:
qdrant

<langchain.vectorstores.qdrant.Qdrant at 0x1793b4760>

In [16]:
qdrant.from_documents(
    chunks,
    embedding=config["embedding"],  # yes this is required here too
    # path=qdrant_path,  # Only required for local instance
    collection_name=qdrant_collection_name,  # yes this is required here too
    url=os.environ.get("QDRANT_URL"),
    api_key=os.environ.get("QDRANT_API_KEY"),  # Only required for Qdrant Cloud
    force_recreate=False,  # don't use if db doesn't already exist
)

<langchain.vectorstores.qdrant.Qdrant at 0x2bb36d3a0>

In [None]:
print(client.get_collections())
print(
    f"""number of points in collection {client.count(collection_name=qdrant_collection_name,)}""")

In [None]:
from qdrant_client import QdrantClient
from langchain.vectorstores.qdrant import Qdrant


def create_localdb_and_add_docs():
    """Use only to create the vectore db and load docs the first time. 
    It overcomes limitations in Langchain by releaseing the vecDB afterwards"""

    client = QdrantClient()

    # Creates a LangChain "vector store" object with entrypoint to your DB within it
    qdrant = Qdrant(client=client,
                    collection_name=qdrant_collection_name,
                    # embedding here is LC interface to the embedding model
                    embeddings=config["embedding"],
                    )
    qdrant.from_documents(
        chunks,
        embedding=config["embedding"],  # yes this is required here too
        path=qdrant_path,  # Only required for local instance
        collection_name=qdrant_collection_name,  # yes this is required here too
        # url=os.environ.get("QDRANT_URL"),
        # Only required for Qdrant Cloud
        # api_key=os.environ.get("QDRANT_API_KEY"),
        force_recreate=False,  # don't use if db doesn't already exist
    )
    # print(client.get_collections())
    # print(
    # f"""number of points in collection {client.count(collection_name=qdrant_collection_name,)}""")


check_me = create_localdb_and_add_docs()

#### Create new Qdrant DB / Collection. 
#### <span style="color:red">WARNING: This will overwrite existing one</span>

In [None]:
# this may not work

from qdrant_client import QdrantClient
from qdrant_client.http import models


client = QdrantClient(
    path=qdrant_path
)  # Only required for local instance) #Initializes an entry point to communicate with Qdrant service via REST or gPRC API

client.create_collection(
    collection_name=qdrant_collection_name,
    vectors_config=models.VectorParams(
        size=1536, distance=models.Distance.COSINE)
)
# You may need to delete the lock file to access this afterwards

#### Add Documents with Timer

In [None]:
import time


def add_docs_to_existingdb_with_delay(batch_size, delay):
    """Use only to create the vectore db and load docs the first time. (7min)
    It overcomes limitations in Langchain by releasing the vecDB afterwards.
    This version loads the chunks into the vector store with a delay"""

    '''Uses the DocArrayInMemorySearch.add_documents
    object method. Aim for ~800K tokens and then have 
    the timer delay until 60 sec is reached'''

    from qdrant_client import QdrantClient
    from qdrant_client.http import models
    from langchain.vectorstores import Qdrant

    client = QdrantClient(
        path=qdrant_path
    )  # Only required for local instance) #Initializes an entry point to communicate with Qdrant service via REST or gPRC API

    # Creates a LangChain "vector store" object with entrypoint to your DB within it
    qdrant = Qdrant(client=client,
                    collection_name=qdrant_collection_name,
                    # embedding here is LC interface to the embedding model
                    embeddings=config["embedding"],
                    )

    # generate indices starting from 0. increment by batch_size until len(chunks)
    for i in range(0, len(chunks), batch_size):
        batch = chunks[i:i+batch_size]  # Create a batch of chunks
        qdrant.add_documents(documents=batch)  # Add the batch of chunks
        # pause time probably don't need to be changed since tokens usually hit limit by 18 sec.
        time.sleep(delay)

    del qdrant
    client.close()    # Release the database from this process
    del client


add_docs_to_existingdb_with_delay(1700, 45)

In [None]:
print(client.get_collections())

print(
    f"""number of points in collection {client.count(collection_name=qdrant_collection_name,)}""")

## 4. Connect to Vector Store


#### Qdrant Cloud


In [4]:
# Creates an instance of Qdrant Client, which is an entrypoint to communicate with the Qdrant service

from qdrant_client import QdrantClient
from langchain.vectorstores import Qdrant


if 'client' not in globals():
    client = QdrantClient(url=os.environ.get("QDRANT_URL"),
                          api_key=os.environ.get("QDRANT_API_KEY"))
else:
    print(f"Client already exists at {client}")
client.get_collections()

CollectionsResponse(collections=[CollectionDescription(name='ASK_vectorstore')])

#### or Qdrant Local


In [None]:
# Creates an instance of Qdrant Client, which is an entrypoint to communicate with the Qdrant service. Running this places a lock file in the qdrant directory

from qdrant_client import QdrantClient
from langchain.vectorstores.qdrant import Qdrant
import psutil

if 'client' not in globals():
    # Only required for local instance``
    client = QdrantClient(path=qdrant_path)
else:
    print(f"Client already exists at {client}")
client.get_collections()

In [26]:
from qdrant_client.local.qdrant_local import QdrantLocal
from qdrant_client.qdrant_remote import QdrantRemote

try:
    # Check if the client is running locally or via a URL
    if isinstance(client._client, QdrantLocal):
        print("The client is running locally.")
    elif isinstance(client._client, QdrantRemote):
        print("The client is running via a URL.")
    else:
        # This else block handles cases where client._client is neither QdrantLocal nor QdrantRemote
        print("Unable to determine the running mode of the Qdrant client.")
except Exception as e:
    # This block catches any other exceptions that might occur
    print("Unable to determine the running mode of the Qdrant client. Error: ", str(e))

The client is running via a URL.


In [5]:
# Creates a LangChain "vector store" object with entrypoint to your DB within it
qdrant = Qdrant(
    client=client,
    collection_name=qdrant_collection_name,
    # embedding here is a LC interface to the embedding model,
    embeddings=config["embedding"],
)

## 5. Initialize a Document Retriever


#### Define a Retriever

In [22]:
# Initializes a VectorStoreRetriever called retriever from the LC qdrant vector store object

# Option 1 using MMR search
retriever = qdrant.as_retriever(
    search_type="mmr",
    search_kwargs={'k': config["k"], "fetch_k": config["fetch_k"],
                   "lambda_mult": config["lambda_mult"]},
)

In [None]:
# Option 2 using k-NN similarity search
retriever = qdrant.as_retriever(
    search_type="similarity",
    # search_kwargs={'k': config["k"]}  # k specify number of nearest neighbors
    search_kwargs={'score_threshold': config["score_threshold"]}
)

OPTIONAL: Test the retriever is functioning

In [24]:
from IPython.display import Markdown
import re

retrieved_docs = retriever.get_relevant_documents(
    "ALAUX/002_23_2023_NATIONAL_WORKSHOPS")


# Regular expression pattern to match metadata inside parentheses
metadata_pattern = re.compile(r"metadata=\{(.*?)\}")

# Function to extract metadata


def extract_metadata(doc_list):
    metadata_list = []
    for doc in doc_list:
        # Convert doc to string if it's not already a string
        if not isinstance(doc, str):
            doc = str(doc)

        matches = metadata_pattern.findall(doc)
        for match in matches:
            # Convert the matched string to a dictionary
            metadata_dict = eval('{' + match + '}')
            metadata_list.append(metadata_dict)
    return metadata_list


# Extracting metadata
metadata_list = extract_metadata(retrieved_docs)

# Print each metadata dictionary as a Markdown list item


def display_selected_metadata_as_markdown(metadata_list):
    # Start with an empty string
    markdown_string = ""

    # Iterate over each metadata dictionary
    for metadata in metadata_list:
        # Extract the /Title and page values
        title = metadata.get('/Title', 'No Title')
        source = metadata.get('source', 'No Source')
        page = metadata.get('page', 'No Page')

        # Add them as a list item in the markdown string
        markdown_string += "Title: {}, Source: {}, Page: {}  \n".format(
            title, source, page)

    # Display the markdown string
    display(Markdown(markdown_string))


# Assuming metadata_list is your list of metadata dictionaries
display_selected_metadata_as_markdown(metadata_list)

Title: , Source: /Users/drew_wilkins/Drews_Files/Drew/Python/VSCode/ASK/data/2023-ALAUX/002_23_2023_NATIONAL_WORKSHOPS.pdf, Page: 0  
Title: No Title, Source: For_injestion/AUXCA SANITATION WORKSHOP MARCH 2022.pdf, Page: 13  
Title: No Title, Source: For_injestion/IT Instructor WORKSHOP 2023 Jan 2023 Final.pdf, Page: 0  
Title: No Title, Source: References/National Directorates/National Leadership Documents/Auxiliary_National_Staff_Guide-November2022.pdf, Page: 0  
Title: No Title, Source: For_injestion/2023_VE_workshop_Dec_4_22.pdf, Page: 0  


## Set up pre-retrieval reasoning step

In [15]:
import pandas as pd
import json


def retrieval_context_excel_to_dict(file_path):
    ''' Read Excel file into a dictionary of worksheets. 
    Each worksheet is its own dictionary. Column 1 is 
    the key. Column 2 is the values'''

    xls = pd.ExcelFile(file_path)
    dict = {}

    for sheet_name in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
        if df.shape[1] >= 2:
            dict[sheet_name] = pd.Series(
                df.iloc[:, 1].values, index=df.iloc[:, 0]).to_dict()
        else:
            print(f"The sheet '{sheet_name}' does not have enough columns.")
    return dict

In [18]:
import os
import openai

openai.api_key = os.environ['OPENAI_API_KEY']
# openai.api_key = st.secrets["OPENAI_API_KEY"] #Use this version for streamlit


def query_maker(user_question):

    retrieval_context_dict = retrieval_context_excel_to_dict(
        '../config/retrieval_context.xlsx')
    acronyms_dict = retrieval_context_dict.get("acronyms", None)
    acronyms_json = json.dumps(acronyms_dict, indent=4)
    terms_dict = retrieval_context_dict.get("terms", None)
    terms_json = json.dumps(terms_dict, indent=4)

    system_message = """
    Your task is to modify the user's question based on two lists: 'acronym_json' and 'terms_json'. Each list contains terms and their associated additional information. Follow these instructions:

    - Review the user's question and identify if any terms from 'acronym_json' or 'terms_json' appear in it.
    - If a term from either 'acronym_json' replace the term with the associated additional information.
    - If the term from 'terms_json' appears in the question, append its associated additional information to the end of the question.
    - Do not remove or alter any other part of the original question.
    - Do not provide an answer to the question.
    - If no terms from either list are found in the question, leave the question as is.

    Example:
    - Question: How do I get a VE certification?
    - Your response: How do I get a vessel examiner certification? Certification includes information about initial qualification.

    - Question: What are the requirements for pilot training?
    - Your response: What are the requirements for pilot training? Pilot is a position in the aviation program.
    """

    user_message = f"User question: {user_question}```acronyms_json: {acronyms_json}\n\nterms_json: {terms_json}```"

    messages = [
        {'role': 'system', 'content': system_message},
        {'role': 'user', 'content': user_message},
    ]

    response = openai.ChatCompletion.create(
        model=config["model"],
        messages=messages,
        temperature=config["temperature"],
        max_tokens=2000,
    )

    return response.choices[0].message['content'] if response.choices else None

## 6. Initialize a Response Generator


#### Option 1: Simple Generator

In [None]:
# Does QA on the vector store
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# keep outside the function so it's accessible elsewhere in this notebook
llm = ChatOpenAI(model=config["model"], temperature=config["temperature"])

rag = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=config["chain_type"],
    retriever=retriever,
    # chain_type_kwargs={"prompt": prompt},# This is how you specify a custom prompt
    # callbacks=[tracer], #this is for wandb
    return_source_documents=True,
)
rag

In [None]:
from IPython.display import display, Markdown

query = "what are the currency maintenance requirements for copilot?"


response = rag({"query": query})


print()
display(Markdown(f"### **Question:**"))
display(Markdown(query))
display(Markdown(f"### **Response:**"))
display(Markdown(f"> <br>{response['result']}<br><br>"))

#### Option 2: Generator with a custom prompt

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, StuffDocumentsChain, LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

'''The default prompt is:
Use the following pieces of context to answer the users question. \nIf you don't know the answer, just say I don't know, don't try to make up an answer.\n----------------\n{context}
'''


system_message_prompt_template = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=['context'],
        template="Use the following pieces of context to answer the users question. INCLUDES ALL OF THE DETAILS YOU CAN IN YOUR RESPONSE, INDLUDING REQUIREMENTS AND REGULATIONS. Include Auxiliary Core Training (AUXCT) in your response for any question regarding certifications or officer positions.  \nIf you don't know the answer, just say I don't know, don't try to make up an answer. \n----------------\n{context}"
    )
)


# Does QA on the vector store

llm = ChatOpenAI(model=config["model"], temperature=config["temperature"])

'''Initializes a simple LLMChain chain: a prompt and a model
    In this case, the prompt is ChatPromptTemplate (could have used PromptTemplate)
    comprised of the system and human prompts and the model is LLM (could have used ChatModels)'''
llm_chain = LLMChain(
    prompt=ChatPromptTemplate(
        input_variables=['context', 'question'],
        messages=[
            system_message_prompt_template,
            HumanMessagePromptTemplate(
                prompt=PromptTemplate(
                    input_variables=['question'],
                    template='{question}'
                )
            )
        ]
    ),
    llm=llm,
)


rag = RetrievalQA(
    combine_documents_chain=StuffDocumentsChain(
        llm_chain=llm_chain, document_variable_name='context'),
    return_source_documents=True,
    retriever=retriever
)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, StuffDocumentsChain, LLMChain
from langchain.prompts import load_prompt

# since this uses ChatPromptTemplate, the _type field in the JSON file is set to "prompt".
prompt_template = load_prompt("prompt_template_aux.json")

llm = ChatOpenAI(model=config["model"], temperature=config["temperature"])

llm_chain = LLMChain(
    prompt=prompt_template,
    llm=llm,
)


rag = RetrievalQA(
    combine_documents_chain=StuffDocumentsChain(
        llm_chain=llm_chain, document_variable_name='context'),
    return_source_documents=True,
    retriever=retriever
)

In [20]:
user_question = "what is required to stay current as a BCCOX?"
query = query_maker(user_question)
print(query)

Modified user question: what is required to stay current as a Boat Crew Coxswain?

Explanation: To stay current as a Boat Crew Coxswain, you need to fulfill the requirements for boat crew currency. See ALAUX 048/22, Calendar Year (CY) 2023 Annual Currency Maintenance Requirement Tracking for Crewmember, Coxswain, PWC Operator, and Nighttime Certification, ALAUX 002/23  2023 National Workshops, CG-BSX Policy Letter 19-02  CHANGES TO AUXILIARY INCIDENT COMMAND SYSTEM (ICS) CORE TRAINING. Coxswain is a position in the Auxiliary Boat Crew program, and the requirements that apply to all boat crewmembers also apply to coxswains.







## 6. Search the index and display results


In [19]:
# query = "what is required to stay current as a boat crewmember?"
response = rag(query)

NameError: name 'rag' is not defined

In [None]:
from IPython.display import display, Markdown


def create_short_source_list(response):
    '''Extracts a list of sources with no description 

    response is a dictionary with three keys:
    dict_keys(['query', 'result', 'source_documents'])
    'source_documents' is a list with a custom object Document 
    '''

    markdown_list = []

    for i, doc in enumerate(response['source_documents'], start=1):
        page_content = doc.page_content
        source = doc.metadata['source']
        short_source = source.split('/')[-1].split('.')[0]
        page = doc.metadata['page']
        markdown_list.append(f"*{short_source}*, page {page}<br>\n")

    short_source_list = '\n'.join(markdown_list)
    return short_source_list


short_source_list = create_short_source_list(response)

# display list
print("")
display(Markdown(f"### **Question:**"))
display(Markdown(f"> <br>{response['query']}<br><br>"))
display(Markdown(f"### **Response:**"))
display(Markdown(f"> <br>{response['result']}<br><br>"))
display(Markdown(f"#### **Source Documents:**"))
display(Markdown(short_source_list))

In [None]:
from IPython.display import display, Markdown


def create_long_source_list(response):
    '''Extracts a list of sources along with full source

    The dictionary has three elements (query, response, and source_documents). 
    Inside the third is a list with a custom object Document 
    associated with the key 'source_documents'
    '''

    markdown_list = []

    for i, doc in enumerate(response['source_documents'], start=1):
        page_content = doc.page_content
        source = doc.metadata['source']
        short_source = source.split('/')[-1].split('.')[0]
        page = doc.metadata['page']
        markdown_list.append(
            f"**Reference {i}:**    *{short_source}*, page {page}<br>  {page_content}\n")

    long_source_list = '\n'.join(markdown_list)
    return long_source_list


long_source_list = create_long_source_list(response)


# display list
display(Markdown("---"))
display(Markdown(f"#### **Full Source References:**"))
display(Markdown(long_source_list))
display(Markdown("---"))
display(Markdown("**Disclaimer:** This service only contains national documents. It is for informational use only and is not intended as a substitute for official policy.*"))

## 7. Evaluate the model's performance


In [None]:
# %pip install --upgrade tiktoken

In [None]:
#  Get wandb Metrics-- WORKS ONLY IN JUPYTER NOTEBOOKS

# %wandb wks_consulting/ChatUSCG_notebook # Display a project workspace

# %wandb wks_consulting/ChatUSCG_notebook/runs/RUN_ID  # Display a single run

# %wandb wks_consulting/ChatUSCG_notebook/sweeps/SWEEP_ID # Display a sweep

# %wandb wks_consulting/ChatUSCG_notebook/reports/REPORT_ID # Display a report

# %wandb wks_consulting/ChatUSCG_notebook -h 2048 # Specify the height of embedded iframe

In [None]:
import tiktoken

encoding = tiktoken.encoding_for_model(config["model"])
query_tokens = encoding.encode(response['query'])
query_length = len(query_tokens)
source_tokens = encoding.encode(str(response['source_documents']))
source_length = len(source_tokens)
result_tokens = encoding.encode(response['result'])
result_length = len(result_tokens)
tokens = encoding.encode(str(response))
tot_length = len(tokens)


print(f"""
    Encoding: {encoding}

    {query_length} query
    {source_length} source
    {result_length} result
    {tot_length} Total tokens used

    GPT-3.5-turbo supports a context window of 4096 tokens
    GPT-3.5-turbo-16k supports a context window of 16,385 tokens
    GPT-4 supports a context window of 8192 tokens
    GPT-4-32k supports a context window of 32,768 tokens
""")

In [None]:
import tiktoken


def count_tokens(response):
    ''' counts the tokens from the response'''
    encoding = tiktoken.encoding_for_model(config["model"])
    query_tokens = encoding.encode(response['query'])
    query_length = len(query_tokens)
    source_tokens = encoding.encode(str(response['source_documents']))
    source_length = len(source_tokens)
    result_tokens = encoding.encode(response['result'])
    result_length = len(result_tokens)
    tokens = encoding.encode(str(response))
    tot_length = len(tokens)

    return query_length, source_length, result_length, tot_length


# Usage:
response = {
    'query': "your_query_here",
    'source_documents': "your_source_documents_here",
    'result': "your_result_here"
}

query_len, source_len, result_len, total_len = count_tokens(response)

'''# use this one in python script
wandb.log({"tokens_used": tot_length, "16k context window": "4096 tokens"})
wandb.finish()  # this is only needed for the juypter notebook'''

In [None]:
print(f"""
    Encoding: {encoding}

    {query_length} query
    {source_length} source
    {result_length} result
    {tot_length} Total tokens used

    GPT-3.5-turbo supports a context window of 4096 tokens
""")

Write response to a pickle file, overwriting existing pickle file

In [None]:
# Save the response (a python dictionary) to a file
import pickle

with open("dummy_response.pkl", "wb") as file:
    pickle.dump(response, file)