In [None]:
## law_1.docx
- pinecone store 저장
    - index name: 임의
- RetrievalQA 구현
    - prompt: rlm/rag-prompt
    - 질문: 전세사기


In [20]:
from dotenv import load_dotenv
import os
from pinecone import Pinecone 
from langchain_pinecone import PineconeVectorStore
from langchain import hub
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


load_dotenv()

api_key = os.getenv('PINECONE_API_KEY')
pc = Pinecone(api_key=api_key)

llm = ChatOpenAI()

prompt = hub.pull('rlm/rag-prompt')

loader = Docx2txtLoader('law_1.docx')

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
)

document_list = loader.load_and_split(text_splitter=text_splitter)

embedding = OpenAIEmbeddings(model='text-embedding-3-large')

database = PineconeVectorStore.from_documents(
index_name='law-index',
embedding=embedding,
documents=document_list,
    )

In [21]:
database = PineconeVectorStore(
index_name='law-index',
embedding=embedding,
    )

def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)

qa_chain = (
    {
        'context': database.as_retriever() | format_docs,
        'question': RunnablePassthrough(),
    }
    | prompt 
    | llm    
    | StrOutputParser() 
)

qa_chain.invoke('전세사기피해자란?')

'전세사기피해자는 전세금을 사기당한 사람을 가리킵니다. 이는 부동산 계약 상 설명된 것과 다른 조건으로 전세금을 받고 사라지는 사기 피해를 의미합니다. 흔히 사기꾼이 부정한 방법으로 돈을 취득하는 것으로 이해할 수 있습니다.'