In [1]:
import os
from langchain.llms import HuggingFaceHub
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [5]:
#Store in vector DB
PERSIST_DIR = "./db"

In [6]:
#Create embeddings
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-large-en",
    query_instruction="Represent the query for retrieval:",
    model_kwargs={"token": os.getenv("HUGGINGFACEHUB_API_TOKEN")}
)

In [None]:
if os.path.exists(os.path.join(PERSIST_DIR, "index")):
    print("🔁 Loading existing vector DB...")
    vectordb = Chroma(persist_directory=PERSIST_DIR, embedding_function=embeddings)
else:
    #Read PDF
    pdf_loader = PyPDFLoader("DDD_Reference_2015-03.pdf")
    pdf_documents = pdf_loader.load()

    # Split PDF textx into chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(pdf_documents)
   

    vectordb = Chroma.from_documents(chunks, embeddings, persist_directory="./db")
    vectordb.persist()

In [8]:
#load the vectordb and set up retrieval
retriever = vectordb.as_retriever(search_kwargs={"k" : 3})

In [9]:
#initialize LLM model
llm = HuggingFaceHub(
    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
    repo_id="tiiuae/falcon-7b-instruct",
    model_kwargs={"temperature": 0.9, "max_new_tokens": 100}
)

  llm = HuggingFaceHub(


In [12]:
qa_chain = RetrievalQA.from_chain_type(

    llm=llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True
)

In [23]:
query = "what is bounded context? Explain me in 200 words"
result = qa_chain(query)
print(result)


  result = qa_chain(query)


{'query': 'what is bounded context? Explain me in 200 words', 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nI.\t\r \xa0Putting\t\r \xa0the\t\r \xa0Model\t\r \xa0to\t\r \xa0Work\t\r \xa0.........................................................................\t\r \xa01\t\r \xa0Bounded\t\r \xa0Context\t\r \xa0....................................................................................................................\t\r \xa02\t\r \xa0Ubiquitous\t\r \xa0Language\t\r \xa0...............................................................................................................\t\r \xa03\t\r \xa0Continuous\t\r \xa0Integration\n\nI.\t\r \xa0Putting\t\r \xa0the\t\r \xa0Model\t\r \xa0to\t\r \xa0Work\t\r \xa0.........................................................................\t\r \xa01\t\r \xa0Bounded\t\r \xa0Context\t\r \xa0........................

In [25]:
result = qa_chain.invoke("what is bounded context?")
print(result)



{'query': 'what is bounded context?', 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nI.\t\r \xa0Putting\t\r \xa0the\t\r \xa0Model\t\r \xa0to\t\r \xa0Work\t\r \xa0.........................................................................\t\r \xa01\t\r \xa0Bounded\t\r \xa0Context\t\r \xa0....................................................................................................................\t\r \xa02\t\r \xa0Ubiquitous\t\r \xa0Language\t\r \xa0...............................................................................................................\t\r \xa03\t\r \xa0Continuous\t\r \xa0Integration\n\nI.\t\r \xa0Putting\t\r \xa0the\t\r \xa0Model\t\r \xa0to\t\r \xa0Work\t\r \xa0.........................................................................\t\r \xa01\t\r \xa0Bounded\t\r \xa0Context\t\r \xa0................................................

In [26]:
print("\nAnswer:")
print(result["result"])


Answer:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

  Integration....................................................................................................	....	

  Integration....................................................................................................	....	

  Mechanisms..........................................................................................................	

Question: what is bounded context?
Helpful Answer:

Bounded context refers to a specific area or range of an enterprise's business model where stakeholders understand the core values, the principles, the and requirements of the business process. It helps to establish common terminology, language, and processes across organizations to support efficiency, coordination, and collaboration. Examples of bounded context can include the technical architecture, industr

In [27]:
print("\nSources:")
for doc in result["source_documents"]:
    print(doc.metadata["source"])


Sources:
DDD_Reference_2015-03.pdf
DDD_Reference_2015-03.pdf
DDD_Reference_2015-03.pdf
