In [None]:
import os
import glob
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import PyPDFLoader

In [130]:
# Directory containing standards
standards_directory = "./standards/"

# 1. Load the documents
def load_standards(directory):
    loader = DirectoryLoader(
        path=directory,
        glob=f"**/*pdf",
        loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

documents = load_standards(standards_directory)

In [None]:
# 3. Embed the chunks and store in vector DB
def embed_chunks(chunks):
    embedding = OllamaEmbeddings(model="nomic-embed-text")
    vector_store = Chroma.from_documents(chunks, embedding)
    return vector_store

vector_store = embed_chunks(documents)

In [None]:
question = "what is federal and state restriction on use of lead content in children items"

In [None]:
llm = Ollama(model="llama3.1")
## Prompt construction
prompt = ChatPromptTemplate.from_template(
    """
    <System>Clear all previous information and provide an answer solely based on the information present context
    </System>
            
                                                    
            <context>
            {context}
            </context>

            Answer the question below. Include all specific information and keep it brief 
                                                    
            Question: {input}
"""
)

## Retrieve context from vector store
docs_chain = create_stuff_documents_chain(llm, prompt)
retriever = vector_store.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, docs_chain)


In [None]:
answer = retrieval_chain.invoke({"input":question})

In [None]:
answer['answer']