In [None]:
%pip install \
    langchain-core \
    langchain-openai \
    langchain-community \
    langchain-text-splitters \
    langchain-chroma \
    pypdf \
    python-dotenv \
    docx2txt

In [None]:
import os
from langchain import hub
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader

In [None]:
os.environ["OPENAI_API_KEY"] = ""

llm = ChatOpenAI(model='gpt-4o')
embedding = OpenAIEmbeddings(model='text-embedding-3-large')

loader = PyPDFLoader('CONSTITUTION OF THE REPUBLIC OF KOREA.pdf')
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200,
    chunk_overlap=200
)

document_list = splitter.split_documents(documents)

database = Chroma.from_documents(
    embedding = embedding,
    documents = document_list,
    collection_name = 'chroma-tax',
    persist_directory = './chroma'
)

prompt = hub.pull('rlm/rag-prompt')

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever = database.as_retriever(),
    chain_type_kwargs={'prompt':prompt}
)

query = 'What is the article 1 of constitution of Republic of Korea '

ai_message = qa_chain.invoke({'query':query})
print(ai_message)



{'query': 'What is the article 1 of constitution of Republic of Korea ', 'result': 'Article 1 of the Constitution of the Republic of Korea states that the Republic of Korea shall be a democratic republic, and the sovereignty of the Republic of Korea shall reside in the people, with all state authority emanating from the people.'}
