## QA RAG

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
from pprint import pprint

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("http://www.infopub.co.kr/new/include/detail.asp?sku=05000274",),
)
docs = loader.load()

print(len(docs))
print(docs[0])
pprint(docs[0].metadata)

In [None]:
# HugoingFace Embeddings를 다운로드
from langchain.embeddings import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(
    model_name="snunlp/KR-SBERT-V40K-klueNLI-augSTS",
)

# HugoingFace Embedding 모델의 Tokenizer를 사용하여 토큰화
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('snunlp/KR-SBERT-V40K-klueNLI-augSTS')

In [None]:
# Token 수를 기준으ㄹ 문서를 청크 단위로 분할
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer = tokenizer,
    chunk_size = 120,
    chunk_overlap  = 10,
)

split_docs = text_splitter.split_documents(docs)
print(len(split_docs))
print(split_docs[0])

In [None]:
vectorstore = Chroma.from_documents(documents=split_docs, 
                                    embedding=embeddings_model)

retriever = vectorstore.as_retriever()

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# Chat Model
llm = ChatOllama(model="qwen2")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# RAG Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Chain 실행
response = rag_chain.invoke("이 책의 특징을 3가지 요점으로 설명해주세요.")
print(response)

## Test Data 만들기 - AutoRAG 활용

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from autorag.data.corpus import langchain_documents_to_parquet
corpus_df = langchain_documents_to_parquet(split_docs, 'corpus_data/pandas_book.parquet')
corpus_df.head()

In [None]:
import pandas as pd

from autorag.data.qacreation import generate_qa_llama_index, make_single_content_qa
from llama_index.llms.ollama import Ollama


prompt = """
Generate question and answer pairs for the given passage.

Passage:
{{text}}

Number of questions to generate: {{num_questions}}

Example:
[Q]: 이 책의 글쓴이는 누구인가요?
[A]: 저자의 이름은 홍길동입니다.

Result:
"""

corpus_df = pd.read_parquet('corpus_data/pandas_book.parquet')

llm = Ollama(model='gemma2', temperature=1.0)
qa_df = make_single_content_qa(corpus_df, content_size=36, qa_creation_func=generate_qa_llama_index,
                            llm=llm, prompt=prompt, question_num_per_content=1)

qa_df.head()

In [None]:
qa_df.to_excel('corpus_data/pandas_book_qa.xlsx', index=False)

In [None]:
# 직접 수정한 QA 데이터를 불러옴
qa_data = pd.read_excel('corpus_data/pandas_book_qa_final.xlsx')
qa_data.head()

In [None]:
qa_data.info()

In [None]:
type(qa_data['retrieval_gt'][0])

In [None]:
type(qa_data['generation_gt'][0])

In [None]:
import ast
for col in ['retrieval_gt', 'generation_gt']:
    qa_data[col] = qa_data[col].apply(ast.literal_eval)

qa_data.info()

In [None]:
type(qa_data['retrieval_gt'][0])

In [None]:
type(qa_data['generation_gt'][0])

## RAGAS - RAG Evaluation

In [None]:
# Chain 테스트
response = rag_chain.invoke("이 책의 출판사는 어디인가요?")
print(response)

In [None]:
qa_data = qa_data[['query', 'generation_gt']]
qa_data.columns = ['question', 'ground_truth']
qa_data.head()

In [None]:
qa_data['ground_truth'] = qa_data['ground_truth'].apply(lambda x: x[0] )
qa_data['answer'] = qa_data['question'].apply(lambda x: rag_chain.invoke(x) )
qa_data['contexts'] = qa_data['question'].apply(lambda x: [d.page_content for d in retriever.get_relevant_documents(x)] )
qa_data.head()

In [None]:
from datasets import Dataset 

dataset = Dataset.from_pandas(qa_data)

dataset[0]

In [None]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

In [None]:
from ragas import evaluate
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings.ollama import OllamaEmbeddings

langchain_llm =  ChatOllama(model="gemma2")
langchain_embeddings = OllamaEmbeddings(model="gemma2")

result = evaluate(
    dataset,
    metrics = [
        faithfulness,
        answer_relevancy,
        context_recall,
        context_precision,
    ],
    llm=langchain_llm, 
    embeddings=langchain_embeddings,
    raise_exceptions=False,
)

result

## Gemma2 모델을 사용한 RAG Chain

In [None]:
# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# Chat Model
llm = ChatOllama(model="gemma2")

rag_chain2 = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

qa_data2 = qa_data.copy()
qa_data2['answer'] = qa_data2['question'].apply(lambda x: rag_chain2.invoke(x) )
qa_data2.head()

In [None]:
dataset2 = Dataset.from_pandas(qa_data2)

result2 = evaluate(
    dataset2,
    metrics = [
        faithfulness,
        answer_relevancy,
        context_recall,
        context_precision,
    ],
    llm=langchain_llm, 
    embeddings=langchain_embeddings,
    raise_exceptions=False,
)

result2

## 모델 성능 비교

In [None]:
df1 = pd.DataFrame(list(result.items()), columns=['Metric', 'Qwen2'])
df2 = pd.DataFrame(list(result2.items()), columns=['Metric', 'Gemma2'])

df_result = pd.merge(df1, df2, on='Metric')
df_result