# Environment

In [1]:
# !pip install langchain langchain_openai rich
# !pip install pypdf chromadb rapidocr-onnxruntime

# Import

In [2]:
# 匯入套件和金鑰
import os
from rich import print as pprint
from langchain_openai import ChatOpenAI

from dotenv import load_dotenv
load_dotenv()

from langchain_community.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [3]:
# load LLM model
chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", api_key=os.getenv("OPENAI_API_KEY"))

In [4]:
# load PDF file
pdf_filepath = '/Users/weikai/Library/CloudStorage/Dropbox/paper/CCWu'
pdf_filename = 'CCWu 2021 Modified distributed Bragg reflector for protecting organic light-emitting diode displays against ultraviolet light.pdf'

loader = PyPDFLoader(file_path=os.path.join(pdf_filepath, pdf_filename))
docs = loader.load()
pprint(f'The PDF file has {len(docs)} pages')

# pprint(docs[0])

In [5]:
# load embeddings model
embeddings_model=OpenAIEmbeddings(model='text-embedding-3-large')
index = VectorstoreIndexCreator(embedding=embeddings_model).from_loaders([loader])



In [6]:
query = "What is the key idea of this paper?"
response = index.query(llm=chat_model, question=query)
pprint(response)

In [7]:
Chroma.from_documents(documents=docs,
                      embedding=embeddings_model,
                      persist_directory='./chromadb',
                      collection_metadata={"hnsw:space": "cosine"})

<langchain_community.vectorstores.chroma.Chroma at 0x11b7a6b70>

In [8]:
db = Chroma(persist_directory='./chromadb', 
            embedding_function=embeddings_model)

In [18]:
retriever = db.as_retriever(search_type="similarity",
                            search_kwargs={"k": 2})

retrieved_docs = retriever.invoke("What is the key idea of this paper?")
print(f'傳回 {len(retrieved_docs)} 筆資料')

pprint( retrieved_docs )

傳回 2 筆資料


In [10]:
str_parser = StrOutputParser()
template = (
    "請根據以下內容加上自身判斷回答問題:\n"
    "{context}\n"
    "問題: {question}"
    )
prompt = ChatPromptTemplate.from_template(template)
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | chat_model
    | str_parser
)

In [11]:
pprint(chain.invoke("What is the key idea of this paper?"))

In [12]:
rag_chain_from_docs = (
    prompt
    | chat_model
    | StrOutputParser()
)

In [13]:
from langchain_core.runnables import RunnableParallel

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [14]:
def chat(query):
    output = {}
    curr_key = None
    for chunk in rag_chain_with_source.stream(query):
        for key in chunk:
            if key not in output:
                output[key] = chunk[key]
            else:
                output[key] += chunk[key]
            if key != curr_key:
                print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
            else:
                print(chunk[key], end="", flush=True)
            curr_key = key
chat("What is the key idea of this paper?")



question: What is the key idea of this paper?

context: [Document(page_content='Research Article Vol. 29, No. 5 / 1 March 2021 / Optics Express 7660\nis the peak wavelength of the first ripple in the conventional DBR (Fig. 4(a)), the peak-to-valley\nmodulation of the total electric field in the conventional DBR (Fig. 5(a)) is larger than that in the\n2ndmodified DBR (Fig. 5(b)). Since the total field is composed of waves propagating toward +z\nand-z,largerpeak-to-valleymodulationindicatesthatmoresimilarintensitiesbetween +zand-z\npropagatingwaves(strongercollectivereflection). Meanwhile,thelowpeak-to-valleymodulation\ninthe2ndmodifiedDBRshowsthatadjustingthelayerthicknessesinthefirstpairandthelast\npair can effectively suppressing the intensity of the -z propagating (reflecting) wave and thus\nsubstantially enhance the overall transmittance.\nFig. 5.Simulated distributions of total electric field intensities in the 6-pair DBRs for\nwavelengthsof300nm,370nm,and493nm: (a)theconventiona