In [1]:
from langchain_community.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.chat_models.ollama import ChatOllama
from langchain_objectbox.vectorstores import ObjectBox
from langchain.prompts import ChatPromptTemplate
from objectbox import Store
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_community.vectorstores import FAISS

import os
import shutil

In [2]:
EMBEDDING = OllamaEmbeddings(model="mxbai-embed-large")

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

CHAT_MODEL = ChatOllama(model='llama3.1', embedding=EMBEDDING)


In [3]:
objectbox = ObjectBox(embedding=EMBEDDING, embedding_dimensions=1024)

In [4]:
def vector_search_db(query_text):
    query_embedding = EMBEDDING.embed_query(query_text)
    db_results = objectbox.similarity_search_by_vector(query_embedding, k=6)
    return db_results

def generate_prompt(db_results, query_text):
    context_text = "\n\n---\n\n".join([doc.page_content for doc in db_results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    return prompt

def generate_response(prompt):
    return CHAT_MODEL.predict(prompt)

def format_response(response_text, db_results):
    sources = set([doc.metadata.get("source", None) for doc in db_results])
    sources = [source.split("/",1)[-1].rsplit(".",1)[0] for source in sources if source is not None]
    return f"Response: {response_text}\nSources: {sources}"

In [5]:
# retriever = objectbox.as_retriever()
# query = "What information does form n400 require?"
# db_results = retriever.invoke(query, k=6)
# print(len(db_results), "results found.")
# prompt = generate_prompt(db_results, query)
# response = generate_response(prompt)
# formatted_response = format_response(response, db_results)
# print(formatted_response)

4 results found.


  warn_deprecated(


KeyboardInterrupt: 

In [None]:
query = "What information does form n400 require?"
db_results = vector_search_db(query)
if db_results:
    prompt = generate_prompt(db_results, query)
    response = generate_response(prompt)
    print(format_response(response, db_results))

Response: Based on the provided context, form N-400 requires the following information:

1. Information About Your Eligibility (Part 1):
	* Reason for Filing
	* Basis of your eligibility (select one box)
2. Contact and Declaration:
	* Preparer's Full Name
	* Preparer's Given Name (First Name)
	* Preparer's Family Name (Last Name)
	* Preparer's Business or Organization Name (if any)
	* Mailing Address, including Street Number and Name, Apt., Flr., Ste., City or Town, State, Postal Code, Province or Region, Country, ZIP Code
3. General Information:
	* A-Number
4. Additional Information (Part 14):
	* Any additional information that may not fit within the provided space, which can be provided on a separate sheet of paper if necessary.

Note that form N-400 is for Application for Naturalization, and the required information includes details about the applicant's eligibility, contact information, and any additional relevant data.
Sources: ['n-400/n-400', 'n-336/n-336', 'n-400/n-400instr']
