In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI, HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.memory import ConversationBufferMemory
from langchain.agents import AgentType, Tool, initialize_agent, load_tools
from langchain.agents.react.base import DocstoreExplorer
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the Hugging Face Hub API token
HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

In [2]:
# Initialize HuggingFaceHub LLM
llm = HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    model_kwargs={"temperature": 0.2, "max_length": 256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
)



In [3]:
def load_and_process_documents(directory):
    # Load and process the text files
    loader = DirectoryLoader(directory, glob="./*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    # Split the text into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    return texts
texts = load_and_process_documents('./data/')

In [4]:
# Embed and store the texts
persist_directory = 'db'
embedding = HuggingFaceInstructEmbeddings(model_name="WhereIsAI/UAE-Large-V1", model_kwargs={'device': 'cpu'})
vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)

# Persist the database to disk
vectordb.persist()
vectordb = None
# Load the persisted database from disk
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

# Create the retriever
retriever = vectordb.as_retriever(search_kwargs={"k": 2})

# Create the question-answering chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

No sentence-transformers model found with name /Users/hytung/.cache/torch/sentence_transformers/WhereIsAI_UAE-Large-V1. Creating a new one with MEAN pooling.


In [5]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [6]:
user_input='What is the result of this study?'
llm_response = qa_chain(user_input)
process_llm_response(llm_response)

 The study used Bayesian estimation in DSEM to analyze data from a 6-day experience sampling study. The study found that individuals who reported higher levels of self-compassion also reported lower levels of stress and anxiety. The study also found that individuals who reported higher levels of self-compassion were more likely to engage in self-care behaviors, such as exercise and healthy eating. The study suggests that promoting self-compassion may be an effective strategy for improving mental health and well-being.


Sources:
data/s41598-023-47912-0.pdf
data/s41598-023-47912-0.pdf


## Agent Test

In [10]:
qa_chain_agent = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False
)

In [11]:
tools = load_tools(["llm-math"], llm=llm)
agent = initialize_agent(tools , llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [13]:
qa_chain_agent = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
qa_chain_agent.run('What is the result of this study?')

' The study used Bayesian estimation in DSEM to analyze data from a 6-day experience sampling study. The study found that individuals who reported higher levels of self-compassion also reported lower levels of stress and anxiety. The study also found that individuals who reported higher levels of self-compassion were more likely to engage in self-care behaviors, such as exercise and healthy eating. The study suggests that promoting self-compassion may be an effective strategy for improving mental health and well-being.'

In [15]:
agent.run("What is the result of this study?")



[1m> Entering new AgentExecutor chain...[0m


ValueError: Error raised by inference API: Internal Server Error

In [14]:
docstore = DocstoreExplorer(RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever))
tools = [
    Tool(
        name="Search",
        func=docstore.search,
        description="useful for when you need to ask with search",
    ),
    Tool(
        name="Lookup",
        func=docstore.lookup,
        description="useful for when you need to ask with lookup",
    ),
]
agent = initialize_agent(tools, llm, agent=AgentType.REACT_DOCSTORE, verbose=True)
agent.run("What is the result of this study?")



[1m> Entering new AgentExecutor chain...[0m


ValueError: Error raised by inference API: Internal Server Error

In [16]:
from langchain.agents import AgentType, Tool, initialize_agent
tools = [
    Tool(
        name = "ReLookup",
        func=qa_chain_agent,
        description="useful for when you need to lookup the answer from the database again"
    )
]

agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
agent.run("What is the result of this study?")



[1m> Entering new AgentExecutor chain...[0m
