In [2]:
# imports
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [3]:
# load dnd handbook
loader = PyPDFLoader(r'C:\Users\connor\PycharmProjects\llm\data\DnD_BasicRules_2018.pdf')
dnd_doc = loader.load()

In [4]:
# split text into chunks
chunk_size = 24
chunk_overlap = 10

# Create an instance of the recursive splitter class
splitter = RecursiveCharacterTextSplitter(
    separators=['\n', '.', ' ', ""],
    chunk_size=24,
    chunk_overlap=10
)

# Split the string and print the chunks
docs = splitter.split_documents(dnd_doc)

In [5]:
# max batch size for Chroma. Fix batching later
# docs = docs[:5460]
docs = docs[:100]

In [None]:
# Embed the documents in a persistent Chroma vector database
# embedding_function = embedding_functions.DefaultEmbeddingFunction()

model_id = "sentence-transformers/all-MiniLM-L6-v2"
# update to gpu later. It's involved
model_kwargs = {'device':'cpu'}
embedding_function = HuggingFaceEmbeddings(model_name=model_id, model_kwargs=model_kwargs)
data_path = r'C:\Users\connor\PycharmProjects\llm\data'

vectorstore = Chroma.from_documents(
    docs,
    embedding=embedding_function,
    persist_directory=data_path
)

vectorstore.persist()

# # Configure the vector store as a retriever
# retriever = vectorstore.as_retriever(
#     search_type="similarity",
#     search_kwargs={"k": 3})
# 
# print("complete")



In [6]:
data_path = r'C:\Users\connor\PycharmProjects\llm\data'
model_id = "sentence-transformers/all-MiniLM-L6-v2"
embedding_function = HuggingFaceEmbeddings(model_name=model_id)
vectordb = Chroma(persist_directory=data_path, embedding_function=embedding_function)

retriever = vectordb.as_retriever()

# qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

  from tqdm.autonotebook import tqdm, trange


In [7]:
# Add placeholders to the message string
message = """
Answer the following question using the context provided:

Context:
{context}

Question:
{question}

Answer:
"""

# Create a chat prompt template from the message string
prompt_template = ChatPromptTemplate.from_messages([("human", message)])

In [8]:
# hf token
huggingfacehub_api_token = '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'

llm = HuggingFaceEndpoint(repo_id='tiiuae/falcon-7b-instruct', huggingfacehub_api_token=huggingfacehub_api_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\connor\.cache\huggingface\token
Login successful


In [9]:
# rag chain
rag_chain = ({"context": retriever, "question": RunnablePassthrough()}
             | prompt_template
             | llm)

In [13]:
response = rag_chain.invoke("How long is the document attached to this rag?")
print(response)

The document is attached to the rag for at least 6 hours.
