In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("attention_is_all_you_need.pdf")
pages = loader.load()

In [3]:
len(pages)

15

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000, # 나뉘는 글자수
    chunk_overlap=200, # 겹쳐지는 글자수
)

In [5]:
splits = text_splitter.split_documents(pages)
len(splits)

52

In [6]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

embeddings_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY)

# 스플릿된 문서를 임베딩 모델을 이용해서 벡터로 변환해서 변환된 벡터를 크로마DB에 저장
db = Chroma.from_documents(splits, embeddings_model)

In [7]:
# 유사도 검색

similarity_retriever = db.as_retriever(
    search_type='similarity',
    search_kwargs={'k': 3}
    # 유사도가 가장높은 3개의 문서 검색해서 가져와라
)

In [8]:
# MMR 검색, 다양성 고려

mmr_retriever = db.as_retriever(
    search_type='mmr',
    search_kwargs={'k': 3, 'fetch_k': 10},
    # 10개의 문서 가져오고 그중에서 3개 선택
)

In [9]:
# Prompt

from langchain_core.prompts import ChatPromptTemplate

template = '''Answer the question based only on the following context:
<context>
{context}
</context>

Question: {input}
'''

prompt = ChatPromptTemplate.from_template(template)

In [10]:
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

model = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0,
                   api_key=OPENAI_API_KEY)

# 랭체인 만들어야 하니까 모델하고 프롬프트 묶기
document_chain = create_stuff_documents_chain(model, prompt)

similarity_retriever_chain = create_retrieval_chain(similarity_retriever, document_chain)

similarity_response = similarity_retriever_chain.invoke({"input":"what is the attention mechanism in transformers?"})

similarity_response

{'input': 'what is the attention mechanism in transformers?',
 'context': [Document(metadata={'page': 4, 'source': 'attention_is_all_you_need.pdf'}, page_content='The Transformer uses multi-head attention in three different ways:\n•In "encoder-decoder attention" layers, the queries come from the previous decoder layer,\nand the memory keys and values come from the output of the encoder. This allows every\nposition in the decoder to attend over all positions in the input sequence. This mimics the\ntypical encoder-decoder attention mechanisms in sequence-to-sequence models such as\n[38, 2, 9].\n•The encoder contains self-attention layers. In a self-attention layer all of the keys, values\nand queries come from the same place, in this case, the output of the previous layer in the\nencoder. Each position in the encoder can attend to all positions in the previous layer of the\nencoder.\n•Similarly, self-attention layers in the decoder allow each position in the decoder to attend to\nall pos

In [11]:
mmr_retriever_chain = create_retrieval_chain(mmr_retriever, document_chain)

mmr_response = mmr_retriever_chain.invoke({"input": "what is the attention mechanism in transformers?"})

mmr_response

{'input': 'what is the attention mechanism in transformers?',
 'context': [Document(metadata={'page': 4, 'source': 'attention_is_all_you_need.pdf'}, page_content='The Transformer uses multi-head attention in three different ways:\n•In "encoder-decoder attention" layers, the queries come from the previous decoder layer,\nand the memory keys and values come from the output of the encoder. This allows every\nposition in the decoder to attend over all positions in the input sequence. This mimics the\ntypical encoder-decoder attention mechanisms in sequence-to-sequence models such as\n[38, 2, 9].\n•The encoder contains self-attention layers. In a self-attention layer all of the keys, values\nand queries come from the same place, in this case, the output of the previous layer in the\nencoder. Each position in the encoder can attend to all positions in the previous layer of the\nencoder.\n•Similarly, self-attention layers in the decoder allow each position in the decoder to attend to\nall pos