# Settings

In [3]:
!docker run -d -p 6379:6379 --name redis-vector redis/redis-stack-server:latest


93002683c24738fa6082e505b311b26ed1b5ac7e60c66f8852fe65d0b565cea9


In [6]:
import os

if not os.path.isdir("../.env"):
    !echo "REDIS_URL=redis://localhost:6379" > "../.env"


In [7]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())


# Data Loading

In [8]:
import pathlib

dir_data = pathlib.Path("../data/docs_sample")


In [9]:
from langchain.document_loaders import DirectoryLoader

document_loader = DirectoryLoader(dir_data, show_progress=True)


In [10]:
documents = document_loader.load()
(len(documents), documents[:10])


100%|██████████| 3/3 [00:06<00:00,  2.01s/it]


(3,
 [Document(page_content='arguslweruna is the king. the king is loved by its people.', metadata={'source': '../data/docs_sample/doc1.txt'}),
  Document(page_content='4831asx is the newest eye glasses in the shop and it can fire lasers.', metadata={'source': '../data/docs_sample/doc3.txt'}),
  Document(page_content='near the city of ag45i4nt there is a bog. all bogs are wet. but the one near this city is dry.', metadata={'source': '../data/docs_sample/doc2.txt'})])

# Data Chunking

In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=75)
document_chunks = text_splitter.split_documents(documents)
(len(document_chunks), document_chunks[:10])


(3,
 [Document(page_content='arguslweruna is the king. the king is loved by its people.', metadata={'source': '../data/docs_sample/doc1.txt'}),
  Document(page_content='4831asx is the newest eye glasses in the shop and it can fire lasers.', metadata={'source': '../data/docs_sample/doc3.txt'}),
  Document(page_content='near the city of ag45i4nt there is a bog. all bogs are wet. but the one near this city is dry.', metadata={'source': '../data/docs_sample/doc2.txt'})])

## Vector Store

In [12]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.redis import Redis

embeddings = HuggingFaceEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")
vector_store = Redis.from_documents(document_chunks, embeddings)
vector_store


  from .autonotebook import tqdm as notebook_tqdm


<langchain.vectorstores.redis.base.Redis at 0x12a405db0>

In [13]:
vector_store.similarity_search_with_score("What is the greatest ocean in the world?")


[(Document(page_content='arguslweruna is the king. the king is loved by its people.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:6e2b57d4cb824a5392cb0c47539ba33f', 'source': '../data/docs_sample/doc1.txt'}),
  0.7937),
 (Document(page_content='near the city of ag45i4nt there is a bog. all bogs are wet. but the one near this city is dry.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:0d0f9fc831874087a97ec27f79dded11', 'source': '../data/docs_sample/doc2.txt'}),
  0.8336),
 (Document(page_content='4831asx is the newest eye glasses in the shop and it can fire lasers.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:9f47dc4aa3a04995afbcfc813edb0a24', 'source': '../data/docs_sample/doc3.txt'}),
  0.9188)]

In [14]:
vector_store.similarity_search_with_score("Where is the dry bog?")


[(Document(page_content='near the city of ag45i4nt there is a bog. all bogs are wet. but the one near this city is dry.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:0d0f9fc831874087a97ec27f79dded11', 'source': '../data/docs_sample/doc2.txt'}),
  0.2019),
 (Document(page_content='4831asx is the newest eye glasses in the shop and it can fire lasers.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:9f47dc4aa3a04995afbcfc813edb0a24', 'source': '../data/docs_sample/doc3.txt'}),
  1.0024),
 (Document(page_content='arguslweruna is the king. the king is loved by its people.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:6e2b57d4cb824a5392cb0c47539ba33f', 'source': '../data/docs_sample/doc1.txt'}),
  1.0448)]

# LLM

In [15]:
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    task="text2text-generation",
    model_id="google/flan-t5-small",
    model_kwargs=dict(temperature=0.01, max_length=128, do_sample=True),
)
llm


HuggingFacePipeline(pipeline=<transformers.pipelines.text2text_generation.Text2TextGenerationPipeline object at 0x108263040>, model_id='google/flan-t5-small', model_kwargs={'temperature': 0.01, 'max_length': 128, 'do_sample': True}, pipeline_kwargs={})

# QA Chain

In [16]:
from langchain import hub

qa_rag_prompt = hub.pull("rlm/rag-prompt")
qa_rag_prompt


ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [17]:
from langchain.chains import RetrievalQA


qa = RetrievalQA.from_chain_type(
    llm,
    retriever=vector_store.as_retriever(
        search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5}
    ),
    chain_type_kwargs={"prompt": qa_rag_prompt},
    return_source_documents=True,
)
qa


RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text2text_generation.Text2TextGenerationPipeline object at 0x108263040>, model_id='google/flan-t5-small', model_kwargs={'temperature': 0.01, 'max_length': 128, 'do_sample': True}, pipeline_kwargs={})), document_variable_name='context'), return_source_documents=True, retriever=RedisVectorStoreRetriever(tags=['Redis', 'HuggingFaceEmbeddings'], vectorstore=<langchain.vectorstores.redis.base.Red

# Question Answering

In [18]:
def ask(question) -> str:
    result = qa({"query": question})
    return result["result"], result["source_documents"]


In [19]:
ask("Who loves arguslweruna?")




('people',
 [Document(page_content='arguslweruna is the king. the king is loved by its people.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:6e2b57d4cb824a5392cb0c47539ba33f', 'source': '../data/docs_sample/doc1.txt'})])

In [20]:
ask("What's arguslweruna role?")




('king',
 [Document(page_content='arguslweruna is the king. the king is loved by its people.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:6e2b57d4cb824a5392cb0c47539ba33f', 'source': '../data/docs_sample/doc1.txt'})])

In [21]:
ask("What is the bog near ag45i4nt like?")




('dry',
 [Document(page_content='near the city of ag45i4nt there is a bog. all bogs are wet. but the one near this city is dry.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:0d0f9fc831874087a97ec27f79dded11', 'source': '../data/docs_sample/doc2.txt'})])

In [22]:
ask("What is 4831asx capable of?")




('fire lasers',
 [Document(page_content='4831asx is the newest eye glasses in the shop and it can fire lasers.', metadata={'id': 'doc:f150fa9bca9d4174921ff43c1ff3cb1c:9f47dc4aa3a04995afbcfc813edb0a24', 'source': '../data/docs_sample/doc3.txt'})])

# Teardown

In [2]:
!docker rm -f redis-vector


redis-vector
