In [None]:
#Load
from langchain_community.document_loaders import ArxivLoader

loader = ArxivLoader(query="2405.17147")
docs = loader.load()

len(docs)

In [None]:
#Chunk
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_slitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap =100)
chunk_docs = text_slitter.split_documents(docs)

len(chunk_docs)

In [None]:
#embed
from langchain_openai import OpenAIEmbeddings
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

embeddings = OpenAIEmbeddings(api_key=openai_api_key)
embeddings 

In [None]:
#create index
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)
index_name = "app-demo"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name = index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

In [None]:
#Create embeddings
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore.from_documents(chunk_docs, 
                                                  embeddings, 
                                                  index_name=index_name)


In [None]:
query = "What metrics are used to evaluate the quality of experience (QoE) for users of large language model (LLM) service"

retriever = vector_store.as_retriever(search_kwargs = {"k":3})
retriever.get_relevant_documents(query)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retrieved_docs = retreiver.invoke(query)
print(format_docs(retrieved_docs))

In [None]:
#create prompt_template
template = """You are an expert LLM assistant specialized in answering questions related to large language models (LLMs). Use the provided information and your knowledge to respond accurately and clearly to each question. 

Guidelines:
1. Provide concise and informative answers.
2. If the question is beyond the scope of your knowledge or the provided information, state, "I don't know."
3. Use examples where applicable to illustrate your answers.
4. Maintain a professional and helpful tone.

Context: {context}
Question: {question}

Answer:
"""


In [None]:
prompt = template.format(context = format_docs(retrieved_docs), question = query)
print(prompt)

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(api_key=openai_api_key)

custom_rag_template = PromptTemplate.from_template(template)

custom_rag_chain = (
    {"context": retreiver | format_docs, "question": RunnablePassthrough()}
    | custom_rag_template
    | llm
    | StrOutputParser()
)

In [None]:
custom_rag_chain.invoke(query)

In [None]:
#create contextualised prompt

from langchain.prompts.chat import ChatPromptTemplate, MessagesPlaceholder

contextualised_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)


contextualised_template = ChatPromptTemplate.from_messages(
    [
        ("system", contextualised_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

In [None]:
#create history aware retriever
from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualised_template)

In [None]:
#Create system prompt
system_prompt_template = """You are an expert LLM assistant specialized in answering questions related to large language models (LLMs). Use the provided information and your knowledge to respond accurately and clearly to each question. 

Guidelines:
1. Provide concise and informative answers.
2. If the question is beyond the scope of your knowledge or the provided information, state, "I don't know."
3. Use examples where applicable to illustrate your answers.
4. Maintain a professional and helpful tone.

Context: {context}

"""
system_template = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt_template),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)


In [None]:
#create question answer chain rag chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

question_answer_chain = create_stuff_documents_chain(llm, system_template)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) #runnable


In [None]:
#Managing chat history
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}

def get_session_history(session_id: str) ->BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()

    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain, 
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [None]:
import pprint

def conversational_chain(query):
    answer = conversational_rag_chain.invoke(
        {"input": query},
        config={
            "configurable": {"session_id":"MySessionId0001"}
        }
    )
    pprint.pprint(answer)
    return answer

In [None]:
conversational_chain("What is QoE?")

In [None]:
conversational_chain("How is it used in LLM model training?")

In [None]:

def conversational_chain(query):
    answer = conversational_rag_chain.invoke(
        {"input": query},
        config={
            "configurable": {"session_id":"MySessionId0001"}
        }
    )["answer"]
    
    return answer

In [None]:
conversational_chain("What is the relationship of LLM and carbon emmision?")

In [None]:
import gradio as gr

def llm_response(query, memory = None):
    return conversational_rag_chain.invoke(
        {"input": query},
        config={
            "configurable": {"session_id":"MySessionId0001"}
        }
    )["answer"]

rag_demo = gr.ChatInterface(
        llm_response,
        title= "RAG Demo",
        chatbot=gr.Chatbot(height=300),
        textbox=gr.Textbox(placeholder = "Enter querey here:", scale=5),
        clear_btn=gr.Button("Clear"),
        undo_btn=gr.Button("Undo"),
        retry_btn=gr.Button("Retry"),
        submit_btn=gr.Button("Submit")
)

In [None]:
rag_demo.launch()