In [4]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_core.callbacks.manager import CallbackManager

# LangChain이 지원하는 다른 채팅 모델을 사용합니다. 여기서는 Ollama를 사용합니다.
llm = ChatOllama(
    model="EEVE-Korean-10.8B:latest",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
)

  llm = ChatOllama(


In [2]:
from langchain.embeddings import HuggingFaceEmbeddings
model_name = "jhgan/ko-sroberta-multitask"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
embedding_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  embedding_model = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, TextLoader

def load_and_split_documents(loaders):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=100)
    all_splits = []

    for loader in loaders:
        pages = loader.load_and_split()
        splits = text_splitter.split_documents(pages)
        all_splits.extend(splits)

    return all_splits

loaders = [
    PyPDFLoader('000660_SK_2023.pdf'),
    TextLoader('나형진.txt', encoding='UTF8'),
    TextLoader('history.txt', encoding='UTF8')
]
all_splits = load_and_split_documents(loaders)

In [6]:
from langchain.vectorstores import Chroma
vector = Chroma.from_documents(documents=all_splits, embedding=embedding_model)
retriever = vector.as_retriever()

In [7]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# 질문 재구성 프롬프트 정의
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is. \
Always in Korean."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


In [8]:
# 질문에 대한 대답 프롬프트 정의
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise. \
Always answer in Korean.

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [15]:
# 대화 히스토리를 사용하지 않는 체인 실행
sessionQu = rag_chain.invoke(
    {"input": "나형진은 무슨 학교를 졸업했어?"}
)

나형진은 전남대학교 수의학과를 졸업하였습니다.