In [2]:
#load
from langchain_community.document_loaders import ArxivLoader

loader = ArxivLoader(query="2405.17147")
docs = loader.load()

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
chunk_data = text_splitter.split_documents(docs)
len(chunk_data)

32

In [5]:
#embeddings
from langchain_openai import OpenAIEmbeddings
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

embeddings = OpenAIEmbeddings(api_key=openai_api_key)

In [6]:
#create index
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)
index_name = "rag-app"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name = index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud = "aws", region="us-east-1")
    )

In [7]:
#Embed and store
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore.from_documents(
    chunk_data, 
    embeddings, 
    index_name=index_name)

In [None]:
query = "What metrics are used to evaluate the quality of experience (QoE) for users of large language model (LLM) service"

retriever = vector_store.as_retriever(search_kwargs = {"k":3})
retriever.get_relevant_documents(query)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retrieved_docs = retriever.invoke(query)
print(format_docs(retrieved_docs))

In [9]:
template = """You are an expert LLM assistant specialized in answering questions related to large language models (LLMs). Use the provided information and your knowledge to respond accurately and clearly to each question. 

Guidelines:
1. Provide concise and informative answers.
2. If the question is beyond the scope of your knowledge or the provided information, state, "I don't know."
3. Use examples where applicable to illustrate your answers.
4. Maintain a professional and helpful tone.

Question: {question}

Context:{context}

Answer:
"""


In [None]:
prompt = template.format(question = query, context =  format_docs(retrieved_docs))
print(prompt)

In [12]:
from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

llm = OpenAI(api_key=openai_api_key)

custom_rag_template = PromptTemplate.from_template(template)

custom_rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_template
    | llm
    | StrOutputParser()
)

In [None]:
custom_rag_chain.invoke(query)

In [17]:
import gradio as gr

def llm_response(query, memory = None):
    return custom_rag_chain.invoke(query)

rag_demo = gr.ChatInterface(
    llm_response, 
    title="RAG demo",
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Enter query here", scale=5),
    examples=["How does the introduction of GPT-4o by OpenAI, represent a pivotal advancement in the evolution of large language models, and what potential applications could arise from these expanded functionalities?", "What are the primary advantages and challenges of deploying LLMs using RAG compared to fine-tuning, particularly in terms of handling specialized knowledge and reducing hallucinations?"],
    retry_btn=gr.Button("Retry"),
    clear_btn=gr.Button("Clear"),
    undo_btn=gr.Button("Undo"),
    submit_btn=gr.Button("Submit")
)

In [None]:
rag_demo.launch()