In [1]:
from langchain_community.chat_models import ChatOllama
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.runnables import RunnableLambda
from bs4 import BeautifulSoup
import re

In [3]:
model_name = 'dunzhang/stella_en_400M_v5'
model_kwargs = {'device': 'cuda', "trust_remote_code": True}

embedding_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
)

  @torch.library.impl_abstract("xformers_flash::flash_fwd")
  @torch.library.impl_abstract("xformers_flash::flash_bwd")
Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
vector_store = Chroma(
    collection_name="its_faq",
    persist_directory="./db",
    embedding_function=embedding_model,
    collection_metadata={"hnsw:space": "cosine"}
)

In [10]:
vector_store.reset_collection()

Splitter and embeddings seem to work fine, perhaps try semantic chunking (https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/semantic-chunker/) or AI21SemanticTextSplitter with merging (https://python.langchain.com/v0.1/docs/integrations/document_transformers/ai21_semantic_text_splitter/#splitting-text-by-semantic-meaning-with-merge) to cut down chunk sizes required

In [11]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=8000,
    chunk_overlap=100
)

In [12]:
from loaders.HTMLDirectory import HTMLDirectoryLoader
from html2text import HTML2Text

h = HTML2Text()
h.ignore_images = True

def faq_html_parser(html):
    soup = BeautifulSoup(html)
    question = soup.find(id="kb_article_question")
    answer = soup.find(id="kb_article_text")

    if not question or not answer:
        return None
    
    question = h.handle(str(question))
    answer = h.handle(str(answer))

    qa = f"{question}\n{answer}"
    removed_repeating_newlines = re.sub(r'\n{2,}', '\n', qa)

    return removed_repeating_newlines

faq_html_loader = HTMLDirectoryLoader("../web-scraper/faq-archive", faq_html_parser)
faq_documents = list(faq_html_loader.lazy_load())
faq_split_documents = text_splitter.split_documents(faq_documents)
vector_store.add_documents(faq_split_documents)

['56127365-796b-4d82-84af-3c92efa4610e',
 '108aabcb-d801-4ce2-80b9-3dd20c2c7368',
 '71babe9e-9d4c-498f-be71-21188c893beb',
 '0dc707e2-fd1c-4f49-8f3e-7e7cfbd27829',
 '8c47674a-fd8a-43f5-9e8b-a0f30be6c89f',
 '0a77aadb-d876-42fb-a890-635428497cf6',
 '8a60f018-5662-48c5-acc6-502bd445e196',
 '9f31160e-d0ce-4f51-8e62-6609b37866c1',
 'd216f016-f10c-4b8c-9fd8-f3f228aef8e4',
 '9e9093ed-e927-46d3-a9a9-59eee1845628',
 'f46618c0-364c-40be-b75a-e57f935c2e06',
 '27efdff0-3a76-46bc-9c6d-eb6cb89da4c2',
 '9d364c1c-44e4-4c22-892e-d0e0e8576ba5',
 '8ea82733-9174-411d-869a-26e9f7829839',
 '01231c33-7bf6-4dce-a489-7b15dbc79cca',
 'ceadc8a9-f5d5-4d4d-891c-cd1646d5616a',
 '1bf95130-9bae-47f2-871c-c57a01e11c9e',
 '1b0536ba-8391-4e01-b6b3-bf12f054fade',
 'bedac155-efe8-4322-a7da-f58e8c61ddee',
 '55bf2546-c40f-436a-ba3f-d5c2a25df347',
 '9c58695f-dfc9-4616-ab1b-4bb111719f6a',
 'cee273b2-99e7-46f2-811a-d07a26161bd4',
 'cabdc512-1d7a-4e3e-9bf7-8c852e5bc326',
 '02b1e815-3c19-400e-929e-df0966e3c468',
 'b20de5ee-b3cb-

In [13]:
print(f"{len(faq_split_documents)} faq docs in vector store")

568 faq docs in vector store


In [14]:
vector_store.similarity_search("duo mobile")

[Document(metadata={'source': 'https://www.hawaii.edu/askus/1859'}, page_content='##  Duo Mobile app 4.0 update\n  *[UH]: University of Hawaii\n### Overview\nThe Duo Mobile 4.0 version of the app is a significant update to the user\ninterface. This update will be released on the following schedule:\n  * For iOS: October 11 through 18, 2021\n  * For Android: October 11 through 15, 2021\nDuo Mobile 4.0 requires\n  * iOS 13 and up\n  * Android 8 and up\nOlder versions of the Duo Mobile app will continue to work.\n### Summary of changes\nThe following are some of the changes to the Duo Mobile app in version 4.0.\n**New Duo Approve screen**\nIn versions prior to 4.0, the _Approve_ button was on the left and the _Deny_\nbutton was on the right. In Duo Mobile 4.0, the _Approve_ button has been\nmoved to the **right** , and the _Deny_ button moved to the **left**. Duo made\nthis change to follow the industry standard of placing positive actions on the\nright (move forward in the flow of action

In [9]:
from loaders.JSONFile import JSONFileLoader

def json_parser(d):
    return {
        "page_content": d["extracted"],
        "metadata" : {"source": d["url"]}
    }

json_file_loader = JSONFileLoader("../web-scraper/data/urls.json", json_parser)
json_documents = json_file_loader.lazy_load()
json_split_documents = text_splitter.split_documents(json_documents)
vector_store.add_documents(json_split_documents)

['141fa506-cea4-4a97-8390-c503335d3b17',
 '879d97ae-db2b-4889-94dd-986bb9e0791d',
 '2338fdb3-3de5-4b55-9a50-9a72291ef6b1',
 '58334ad3-8ecd-4b9b-89e4-9906f3692d9d',
 '7ab91c93-2319-4ace-b6d2-40169ae75b4d',
 '385d41ad-3e5f-4d8a-96fb-611741f7a123',
 '0d58fb82-acf2-4521-88f8-01f9e6ab9806',
 'a61a9ddf-1a60-45b2-b821-35db4c9ee645',
 'ae5abb7d-bb00-4f8f-9606-03e8e857d41a',
 '11c7f867-7927-407f-ad8d-e0a5098a294d',
 'c3745539-e987-4732-9bc7-b022664ffef5',
 'dc144c86-e8a4-4a26-bb4b-4efb06859632',
 'c752b43e-e8f4-4ef2-89b5-ff6cf237ed9f',
 '0c5bd09b-ec47-4774-b61d-41213e18c3ae',
 'aaa0dc8f-d577-4c88-b9c4-ab86bfa97d5c',
 'c1455dbf-59ab-40fa-9813-1b9c83830989',
 'b8e67c9c-6d36-4803-8a8d-1f5d7ca07d78',
 '975fd365-7002-40c1-ae4b-36cd065ec8f5',
 'b531936c-3a3c-4fb1-ac39-04a9f488220a',
 '6269abdf-3ac5-455c-9954-86ca7924c264',
 '1044431a-cdab-4239-9b13-ec303402e0ff',
 'faf68217-7abb-465e-a5cd-473f6bb3ce88',
 '80adaa74-da35-4883-8d66-ac51d4198d39',
 '4eee73c3-b6e1-46a1-9518-7f247560126c',
 'fd7237ca-dcca-

In [10]:
print(f"{len(json_split_documents)} json docs in vector store")

773 json docs in vector store


In [15]:
# models: gemma2, gemma2:27b, mistral-nemo
# Size up to xl then pull and try 'command-r', notable for RAG and Tool use
model = "mistral-nemo"

llm = ChatOllama(model=model, temperature=0)

In [16]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.retrievers import EnsembleRetriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

from langchain.globals import set_verbose
from langchain.callbacks.tracers import ConsoleCallbackHandler
set_verbose(True)


retriever = vector_store.as_retriever(
    search_kwargs={'k': 2}
)

combined_retriever = EnsembleRetriever(retrievers=[retriever, ])

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=combined_retriever
)

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question. "
    "just reformulate it if needed and otherwise return it as is. "
    "if there is no chat history, return the input as is. "
    "if the input is a greeting, return the input as is. "
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, combined_retriever, contextualize_q_prompt
)

system_prompt = (
    "Your name is Hoku. You are an assistant for answering questions about UH Manoa."
    "Answer the question given ONLY the provided context.\n"
    "If the answer DOES NOT appear in the context, say 'I'm sorry I don't know the answer to that'.\n"
    "Keep your answer concise, informative and with a conversational tone.\n"
    "DO NOT mention the context or the text provided."
    "if the user greets you, greet them back nicely"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "context:{context}\n\nquestion: {input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

sources_examples = [
    {"input": "Hi Hoku!", "output": "no"},
    {"input": "How are you?", "output": "no"},
    {"input": "What is duo mobile used for?", "output": "yes"},
    {"input": "what specs should i have for a mac laptop?", "output": "yes"},
    {"input": "Thank you!", "output": "no"},
]

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=sources_examples,
    input_variables=["input"]
)

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Your job is to classify a user input as needing sources 'yes' or not needing sources 'no'."),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)

def requires_source(inp: dict):
    chain = final_prompt | llm
    print(f"Final prompt: {final_prompt}")
    result = chain.invoke(inp).content.lower()
    return "yes" in result


def add_sources_to_response_if_needed(inp: dict) -> dict:
    if not requires_source({"input" : inp["input"]}):
        print("No sources needed for this response")
        return inp
    
    sources_text = "\n".join(list(set(doc.metadata["source"] for doc in inp['context'])))
    inp["answer"] = f"{inp['answer'].strip()}\n\nFor more information, check out these links\n{sources_text}"
    return inp

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

conversational_rag_chain_with_sources = conversational_rag_chain | add_sources_to_response_if_needed

In [17]:
store = {}

In [19]:
user_input = input()
if user_input:
    answer = conversational_rag_chain_with_sources.invoke(
        {"input": user_input},
        config={
            "configurable": {"session_id": "1"},
            # 'callbacks': [ConsoleCallbackHandler()]
        },
    )
    print("="*30)
    print("Answer")
    print("="*30)
    print(answer["answer"])
    print("="*30)
    print("Sources used for answer:")
    for idx, doc in enumerate(answer.get("context", [])):
        print(f"Doc {idx + 1}:")
        print(f"Content: {doc.page_content[:100]}...")
        print(f"Source: {doc.metadata.get('source', 'N/A')}")
        print()

Final prompt: input_variables=['input'] messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template="Your job is to classify a user input as needing sources 'yes' or not needing sources 'no'.")), FewShotChatMessagePromptTemplate(examples=[{'input': 'Hi Hoku!', 'output': 'no'}, {'input': 'How are you?', 'output': 'no'}, {'input': 'What is duo mobile used for?', 'output': 'yes'}, {'input': 'what specs should i have for a mac laptop?', 'output': 'yes'}, {'input': 'Thank you!', 'output': 'no'}], input_variables=['input'], example_prompt=ChatPromptTemplate(input_variables=['input', 'output'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=['output'], template='{output}'))])), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))]
Answer
Duo Mobile is a software application developed by Duo Security 