In [1]:
# Helper function for printing docs
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
import pandas as pd
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [2]:


# Load documents from a local directory
#loader = DirectoryLoader(
#    r'C:\Users\admin\Documents\testtxtdir\rawtxtdir',  # Specify the path to your local directory
#)

# Load the documents
#data = loader.load()

data = TextLoader(r"C:\Users\admin\Documents\testtxtdir\rawtxtdir\F16_flight_manual.txt").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=15)
#retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()
all_splits = text_splitter.split_documents(data)
model = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=model)
#retriever = vectorstore.as_retriever()

#docs = retriever.invoke("What does the doc say about anti skid?")
#pretty_print_docs(docs)
from langchain_ollama import ChatOllama
llm = ChatOllama(model="llama3.1:8b")

In [3]:
def RAG(user_prompt, llm, vectorstore, top_k_hits = 3, stream = False, source_summaries = False):
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    # For conciseness add: Use three sentences maximum and keep the answer concise.
    RAG_TEMPLATE = """
    You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    Keep it brief. 
    
    <context>
    {context}
    </context>
    
    Answer the following question:
    
    {question}"""
    
    rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)
    
    retriever = vectorstore.as_retriever()
    compressor = LLMChainExtractor.from_llm(llm)
    compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
    )

    
    qa_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | rag_prompt
        | llm
        | StrOutputParser()
    )
    
    question = user_prompt
    
    #docs = vectorstore.similarity_search(question, k = top_k_hits)
    #compressed_docs = 
    docs = compression_retriever.invoke(f"{question}")
    if not docs:
        return "No relevant documents found", pd.DataFrame()
    
    sources = []
    contents = []
    for i in range(len(docs)):
        sources.append(docs[i].metadata['source'])
        contents.append(docs[i].page_content)
        
    if source_summaries:
        summaries = [llm.invoke(f'summarize this in one sentence. <{doc.page_content}> ').content for doc in docs]
        source_df = pd.DataFrame([sources, contents, summaries],index = ["source", "content", "short summary"]).T
    else:
        source_df = pd.DataFrame([sources, contents],index = ["source", "content"]).T
    
    if stream:
        for chunk in qa_chain.stream(question):
            print(chunk, end="", flush=True)
        return '', source_df
    else:
        result = qa_chain.invoke(question)
        return result, source_df

In [4]:
user_prompt = "Tell me about the air refueling process"

result, sources_df = RAG(user_prompt, llm, vectorstore, stream = True, source_summaries = False, top_k_hits = 3)

print(result)
print('\nSource information:')
sources_df

According to the context, the air refueling process involves the following steps:

1. The leader of the receiving aircraft will proceed to the pre-contact position.
2. Each subsequent receiver will visually clear and move from their observation position to the pre-contact position, following a designated refueling sequence.
3. The tanker boom operator controls the boom while the fuel transfer is controlled by the tanker crew from the pilots' compartment.
4. The receiving aircraft will refuel in fingertip formation, with each aircraft maintaining a safe distance from the tanker and other receivers.
5. When all fighters have refueled, the flight may depart the tanker or remain in echelon formation on the tanker's left wing to reverse the Quick Flow procedures.
6. In case of a breakaway, the On-Deck receiver follows the receiver that was on the boom, while any receivers on the wing will remain with the tanker.

The context also mentions specific requirements and procedures for air refueli

Unnamed: 0,source,content
0,C:\Users\admin\Documents\testtxtdir\rawtxtdir\...,SPECIAL AIR REFUELING PROCEDURES\nThe tanker b...
1,C:\Users\admin\Documents\testtxtdir\rawtxtdir\...,REMAIN IN TRAIL POSITION UNTIL THEY ARE CLEARE...
2,C:\Users\admin\Documents\testtxtdir\rawtxtdir\...,REFUELING TRANSFER RATE\nThe air refueling tra...
