In [1]:
from flask import Flask, request, jsonify

# RAG Creation
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

# LLM Inference
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

from pathlib import Path
from pydantic import BaseModel

In [12]:
request_data = "C:/Users/jacwallace/Repos/TPRM-Accelerator/training_data/harry_potter.txt"

# Extract data from request
filePath = request_data


# Load document txt
with open(filePath) as f:
    data = f.read()

In [14]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=7500,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
)

In [15]:

texts = text_splitter.create_documents([data])

chunks = text_splitter.split_documents(texts)

# Add to vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag"
)

OllamaEmbeddings: 100%|██████████| 61/61 [08:02<00:00,  7.90s/it]


In [19]:
# Extract data from request
question = "Who is harry potter?"

# LLM from Ollama
local_model = "llama2"
llm = ChatOllama(model=local_model)

QUERY_PROMPT = PromptTemplate(
input_variables = ["question"],
template = """You are an AI language model assistant. Your task is to generate five different versions of 
the given user question to retrieve relevant documents from vector database. By generating multiple respectives 
on the user question, your goal is to help the user overcome some of the limitations of the distance-based 
similarity search. Provide these alternative questions separated by newlines.
Original question: {question}""",
)

retriever = MultiQueryRetriever.from_llm(
vector_db.as_retriever(),
llm,
prompt = QUERY_PROMPT
)

In [20]:


# RAG Prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)

In [21]:
rag_response = chain.invoke((question))
print(rag_response)

OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.93s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.16s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.16s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.16s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


In the given text, Harry Potter is a young boy who discovers that he is a wizard and has been accepted into Hogwarts School of Witchcraft and Wizardry. He is the son of Lily and James Potter, who were killed by the dark wizard Lord Voldemort, also known as the "Dark Lord." Harry's uncle, Vernon Dursley, had taken him in after his parents' death but treated him poorly and tried to keep him from discovering his true identity as a wizard. However, Harry discovers that he is a wizard through a magical letter from Hogwarts and meets Hagrid, a half-giant who works at Hogwarts and helps him on his journey.
