In [2]:
#Load
from langchain_community.document_loaders import ArxivLoader

loader = ArxivLoader(query = "2405.17147")
docs = loader.load()
len(docs)

1

In [3]:
#Chunk data
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
chunk_data = text_splitter.split_documents(docs)
len(chunk_data)

32

In [4]:
from langchain_openai import OpenAIEmbeddings
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(api_key=openai_api_key)
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001923BAD6B70>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001923BAD40B0>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [6]:
#create Pineconde Index

from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "pinecone-vector"


In [8]:
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec= ServerlessSpec(cloud="aws", region= "us-east-1")
    )

In [9]:
#Create vector store 
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore.from_documents(chunk_data, embeddings, index_name=index_name)

In [10]:
query = "What metrics are used to evaluate the quality of experience (QoE) for users of large language model (LLM) service"

retriever = vector_store.as_retriever(search_kwargs={"k":3})
retriever.get_relevant_documents(query)

  warn_deprecated(


[Document(page_content='from the quality of experience (QoE)’s perspective of end users.\nLastly, we envisioned the future hybrid architecture of LLM\nprocessing and its corresponding sustainability concerns, partic-\nularly in the environmental carbon footprint impact. Through\nthese discussions, we provided a comprehensive overview of\nthe operational and strategic considerations essential for the\nresponsible development and deployment of LLMs.\nI. UBIQUITOUS LLMS\nThe recent unveiling of GPT-4o by OpenAI on May 13,\n2024 marks a pivotal moment in the evolution of large\nlanguage models (LLMs) [1]. This groundbreaking model,\naptly named with “o” signifying “omni” for its comprehensive\ncapabilities, transcends the limitations of its predecessors by\nincorporating multi modality. This signifies a significant step\ntowards achieving more natural and intuitive human-computer\ninteraction.\nThe emergence of LLMs started from the launch of Chat-\nGPT in November 2022 after two months of

In [None]:
#format docs
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retrieved_docs = retriever.invoke(query)
print(format_docs(retrieved_docs))

In [39]:
template = """"You are an expert LLM assistant specialized in answering questions related to large language models (LLMs). Use the provided information and your knowledge to respond accurately and clearly to each question. 

Guidelines:
1. Provide concise and informative answers.
2. If the question is beyond the scope of your knowledge or the provided information, state, "I don't know."
3. Use examples where applicable to illustrate your answers.
4. Maintain a professional and helpful tone.

Context: {context}
Question: {question}
Answer:
"""

In [None]:
question = "What are the primary advantages of using Retrieval-Augmented Generation (RAG)?"
context = "RAG combines retrieval mechanisms with generative models to provide accurate and relevant responses by fetching information from external sources and generating coherent answers."

prompt = template.format(context = context, question = question)
print(prompt)

In [40]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(api_key=openai_api_key)

custom_rag_template = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_template
    | llm
    | StrOutputParser()
)

In [41]:
query = "what potential applications could arise from gpt-4o expanded functionalities?"
rag_chain.invoke(query)

"With the expanded functionalities of GPT-4o, potential applications could include:\n\n1. Enhanced virtual assistants: GPT-4o's multi-modality capabilities could improve the user experience of virtual assistants by enabling them to generate not only text-based responses but also images, videos, audio, and more in a more natural and intuitive manner.\n\n2. Content creation: GPT-4o could be used to generate diverse types of content, such as creating image captions, summarizing videos, or even generating audio content based on text descriptions. This could streamline content creation processes for various industries like media, marketing, and entertainment.\n\n3. Accessibility tools: The text-to-audio/sound functionality of GPT-4o could be leveraged to create human-like speech or various environmental sounds from text descriptions, making it a valuable tool for enhancing accessibility for individuals with visual impairments or other disabilities.\n\n4. Storytelling technologies: GPT-4o's 

In [42]:
#Gradio
import gradio as gr

def llm_response(query, context = None):
    return rag_chain.invoke(query)

rag_demo = gr.ChatInterface(
                llm_response,
                chatbot=gr.Chatbot(height=300),
                textbox=gr.Textbox(placeholder="Enter query here", container=False, scale=5),
                title="RAG Demo",
                examples=["How does the introduction of GPT-4o by OpenAI, represent a pivotal advancement in the evolution of large language models, and what potential applications could arise from these expanded functionalities?", "What are the primary advantages and challenges of deploying LLMs using RAG compared to fine-tuning, particularly in terms of handling specialized knowledge and reducing hallucinations?"],
                cache_examples=False,
                retry_btn="Retry",
                undo_btn="Undo",
                clear_btn="Clear",
                submit_btn="Submit"
            )

In [43]:
rag_demo.launch()

Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.


