In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai.chat_models import AzureChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')

In [3]:
directory = "E:\\2024_GenAI\\LANGCHAIN\\Langchain_Chroma\\data\\Canada.pdf"

def load_docs(directory):
    loader = PyPDFLoader(directory)
    documents = loader.load()
    return documents

documents = load_docs(directory)
len(documents)

65

In [4]:
def split_docs(documents, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

docs = split_docs(documents)
len(docs)

517

In [5]:
embedding = AzureOpenAIEmbeddings(
    api_key=AZURE_OPENAI_API_KEY,
    azure_deployment='vector-search-instance2',
    azure_endpoint='https://cb-att-openai-instance.openai.azure.com/',
    api_version='2024-02-15-preview'
)

In [6]:
persist_directory = 'chroma_db'

vectordb = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory=persist_directory)

In [7]:
vectordb.persist()

In [8]:
llm = AzureChatOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    azure_deployment='atttestgpt35turbo',
    azure_endpoint='https://cb-att-openai-instance.openai.azure.com/',
    api_version='2024-02-15-preview'   
)

In [9]:
chroma_db = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [11]:
chroma_db.similarity_search("what is the population in canada", k=2)

[Document(page_content="has also produc ed one of the world's most successful and widely\nused soundi ng rockets, the Black Brant.[322]\nThe 2021 Canadian census enumerated a total\npopul ation of 36,991,981, an increase of around 5.2\npercent over the 2016 figure.[324] It is estimated that\nCanada's popul ation surpassed 40,000,000 in 2023.[325]\nThe main drivers of popul ation growth are immigration\nand, to a lesser extent, natural growth.[326] Canada has\none of the highest per-capita immigration rates in the", metadata={'page': 16, 'source': 'E:\\2024_GenAI\\LANGCHAIN\\Langchain_Chroma\\data\\Canada.pdf'}),
 Document(page_content="tion_and_demography/40-million). Statistics Canada. June 16, 2023. Retrieved\nSeptember 7, 2023.\n326. Edmonston, Barry; Fong, Eric (2011). The Changing Canadian Population (https://books.goo\ngle.com/books?id=VVYOgvFPvBEC&pg=PA181). McGill-Queen's University Press. p. 181.\nISBN 978-0-7735-3793-4.\n327. Zimmerman, Karla (2008). Canada (https://books.goo

In [14]:
chain = load_qa_chain(llm=llm, chain_type="stuff", verbose=True)

In [15]:
question = "what is the population in canada?"
matching_result = chroma_db.similarity_search(question, k=3)

result = chain.run(input_documents=matching_result, question=question)
result



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
has also produc ed one of the world's most successful and widely
used soundi ng rockets, the Black Brant.[322]
The 2021 Canadian census enumerated a total
popul ation of 36,991,981, an increase of around 5.2
percent over the 2016 figure.[324] It is estimated that
Canada's popul ation surpassed 40,000,000 in 2023.[325]
The main drivers of popul ation growth are immigration
and, to a lesser extent, natural growth.[326] Canada has
one of the highest per-capita immigration rates in the

tion_and_demography/40-million). Statistics Canada. June 16, 2023. Retrieved
September 7, 2023.
326. Edmonston, Barry; Fong, Eric (2011). The Changing Canadian Population (https://books.goo
gl

"According to the latest data available, the population of Canada was 36,991,981 in 2021. It is estimated that Canada's population surpassed 40,000,000 in 2023."