In [1]:
from flask import Flask, request, jsonify

# RAG Creation
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

# LLM Inference
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

from pathlib import Path
from pydantic import BaseModel

In [2]:
request_data = "C:/Users/jacwallace/Repos/TPRM-Accelerator/training_data/office_s1_e4.txt"

# Extract data from request
filePath = request_data


# Load document txt
with open(filePath) as f:
    data = f.read()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=7500,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
)

In [4]:

texts = text_splitter.create_documents([data])

chunks = text_splitter.split_documents(texts)

# Add to vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag"
)

OllamaEmbeddings: 100%|██████████| 4/4 [00:24<00:00,  6.09s/it]


In [5]:
# Extract data from request
question = "What is the alliance they speak of in this episode?"

# LLM from Ollama
local_model = "llama2"
llm = ChatOllama(model=local_model)

QUERY_PROMPT = PromptTemplate(
input_variables = ["question"],
template = """You are an AI language model assistant. Your task is to generate five different versions of 
the given user question to retrieve relevant documents from vector database. By generating multiple respectives 
on the user question, your goal is to help the user overcome some of the limitations of the distance-based 
similarity search. Provide these alternative questions separated by newlines.
Original question: {question}""",
)

retriever = MultiQueryRetriever.from_llm(
vector_db.as_retriever(),
llm,
prompt = QUERY_PROMPT
)

In [6]:


# RAG Prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)

In [7]:
rag_response = chain.invoke((question))
print(rag_response)

OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.91s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.13s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.13s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.14s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.14s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


In this episode of The Office, the alliance being referred to is the surprise party for Meredith's birthday. Michael has planned a surprise party for Meredith, but Jim and Dwight are also involved in the plan. They want to keep the party a secret from Meredith until the big reveal, so they tape the lid of the box containing the cake shut and try to quiet Jim when he tries to tell Meredith about it. The alliance is the group of people working together to pull off the surprise party without Meredith knowing about it.
