In [1]:
import os
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [2]:
data_dir = './data/'
persist_dir = './persist/'
llm = Ollama(model="mistral")
embedding = OllamaEmbeddings(model="mistral")

In [3]:
def load_documents(data_dir):
    loader = DirectoryLoader('./data/',glob="./*.txt",loader_cls=TextLoader)
    docs = loader.load()
    return docs

In [4]:
def chunk_documents(docs):
    ts = CharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
    chunks = ts.split_documents(docs)
    return chunks

In [5]:
def vec_store(chunks):
    vector_store = Chroma.from_documents(documents=chunks,embedding=embedding,persist_directory=persist_dir)
    return vector_store

In [6]:
docs = load_documents(data_dir)

In [7]:
chunks = chunk_documents(docs)

Created a chunk of size 4264, which is longer than the specified 1000
Created a chunk of size 4989, which is longer than the specified 1000
Created a chunk of size 4261, which is longer than the specified 1000
Created a chunk of size 2521, which is longer than the specified 1000
Created a chunk of size 4305, which is longer than the specified 1000
Created a chunk of size 3889, which is longer than the specified 1000
Created a chunk of size 3896, which is longer than the specified 1000
Created a chunk of size 3636, which is longer than the specified 1000
Created a chunk of size 3939, which is longer than the specified 1000
Created a chunk of size 5059, which is longer than the specified 1000
Created a chunk of size 4243, which is longer than the specified 1000
Created a chunk of size 4012, which is longer than the specified 1000
Created a chunk of size 2637, which is longer than the specified 1000
Created a chunk of size 4280, which is longer than the specified 1000
Created a chunk of s

In [8]:
# vector_store = vec_store(chunks)

In [9]:
# vector_store.persist()

In [10]:
vectordb = Chroma(persist_directory=persist_dir, embedding_function=embedding)

In [11]:
retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":6})

In [12]:
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")


In [13]:
document_chain = create_stuff_documents_chain(llm, prompt)

In [14]:
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [24]:
response = retrieval_chain.invoke({"input": "Who is blind?"})

In [25]:
response["answer"]

' Tiffany and some members of the rural community that Jyotirgamaya Foundation engages with are blind.'