In [1]:
import os
import requests
from dotenv import load_dotenv
from operator import itemgetter

from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import Ollama

from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

load_dotenv()
OLLAMA_URL = os.getenv("OLLAMA_URL")

In [None]:
payload = requests.get(f"{OLLAMA_URL}/api/tags").json()
model_names = [model['name'] for model in payload['models']]
model_names = ("\n".join(model_names)).rstrip("\n")
print(model_names)

In [2]:
llm = Ollama(
    model="mistral:7b",
    base_url=OLLAMA_URL,
    temperature=0.5,
)

In [3]:
template = """Use the following context strictly to formulate your answers:
Context:\n{context}\n
Question: {question}

Make sure to answer this briefly. Answer strictly in {language} only.
Answer: """

prompt = PromptTemplate.from_template(template)

output_parser = StrOutputParser()

In [64]:
loader = DirectoryLoader('../documents', glob="**/*.txt", show_progress=True, loader_cls=TextLoader)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
embedding = OllamaEmbeddings(
    model="mistral:7b",
    base_url=OLLAMA_URL,
)

DB_PATH = "../.data/faiss_index"
if os.path.isdir(DB_PATH):
    print("Loading existing FAISS index")
    vectorstore = FAISS.load_local(DB_PATH, embeddings=embedding, allow_dangerous_deserialization=True)
else:
    print("Creating new FAISS index")
    vectorstore = FAISS.from_documents(documents=chunks, embedding=embedding, normalize_L2=True)
    vectorstore.save_local("../.data/faiss_index")

retriever = vectorstore.as_retriever()

100%|██████████| 2/2 [00:00<00:00, 666.71it/s]

Loading existing FAISS index





In [62]:
for k,v in vectorstore.docstore._dict.items():
    print(k, v.metadata['source'])

76be3be5-e9f1-49b5-814c-a8e4cf7e15db ..\documents\info.txt
e8bcdc34-ea85-493b-b56e-a5346d90f1df ..\documents\other.txt


In [5]:
def format_docs(docs):
    return "\n\n".join(f"[Document = {doc.metadata['source']}]\n{doc.page_content}" for doc in docs)

def debug(inputs):
    try:
        print(f"DEBUG\n-----\n{inputs.text}\n")
    except:
        print(f"DEBUG\n-----\n{inputs}\n")
    return inputs

sources = []
def save_sources(docs):
    sources = docs
    return docs

In [6]:
chain = (
    {
        "context": itemgetter("question") | retriever | save_sources | format_docs,
        "question": itemgetter("question"),
        "language": itemgetter("language")
    }
    # | RunnableLambda(debug)
    | prompt
    # | RunnableLambda(debug)
    | llm
    | output_parser
    | add_sources
)

output = chain.invoke({
    "question": "What is current value of Bitcoin?",
    "language": "English"
})
print(output)

 The current value of Bitcoin, as mentioned in the given context, is 12 USD (Reference Document = ..\documents\other.txt).


In [8]:
rag = retriever | format_docs
print(rag.invoke("Bitcoin"))

[Document = ..\documents\other.txt]
Bitcoin's current value is 12 USD.
Ethereum's current value is 5.5 INR.

[Document = ..\documents\info.txt]
My salary is 15000 SEK in the year 2029.
