### Environment

#### Langsmith Tracing

In [12]:
from langsmith import Client
client = Client()

In [13]:
import os
import getpass
from dotenv import load_dotenv
load_dotenv()

# 載入 api_key 使用
def _set_env(var: str):
    if not os.getenv(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("CHATGPT_API_KEY")
_set_env("TAVILY_API_KEY")
OPENAI_API_KEY = os.getenv('CHATGPT_API_KEY')
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')

#### Langchain environment

In [14]:
from langchain_openai import OpenAIEmbeddings 
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI 
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.chains import create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

In [15]:
from langchain_openai import OpenAIEmbeddings 
from langchain_openai import ChatOpenAI 

from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from typing_extensions import TypedDict
from typing import List
from langchain.schema import Document
from langgraph.graph import END, StateGraph

### Web Search Tool

In [16]:
web_search_tool = TavilySearchResults()

### Question Router

In [17]:
# 定義兩個工具的 DataModel
class web_search(BaseModel):
    """
    網路搜尋工具。若問題與課程評價"有關"，請使用web_search工具搜尋解答。
    """
    query: str = Field(description="使用網路搜尋時輸入的問題")

class vectorstore(BaseModel):
    """
    跟課程詳細資料有關的向量資料庫工具。若問題與課程詳細資料，例如 grading, description 等，請使用此工具搜尋解答。
    """
    query: str = Field(description="搜尋向量資料庫時輸入的問題")


# Prompt Template
instruction = """
你是將使用者問題導向向量資料庫或網路搜尋的專家。
向量資料庫包含有關成大選修課程的詳細相關資訊。對於這些主題的問題，請使用向量資料庫工具。其他情況則使用網路搜尋工具。
"""
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",instruction),
        ("human", "{question}"),
    ]
)

# Route LLM with tools use
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY,model="gpt-3.5-turbo", temperature=0)

structured_llm_router = llm.bind_tools(tools=[web_search, vectorstore])

# 使用 LCEL 語法建立 chain
question_router = route_prompt | structured_llm_router

In [18]:
# 測試 Route 功能
response = question_router.invoke({"question": "東京的經緯度是多少?"})
print(response.additional_kwargs['tool_calls'])
response = question_router.invoke({"question": "請列出會計系 : 初級會計學（一）的 grading?"})
print(response.additional_kwargs['tool_calls'])
response = question_router.invoke({"question": "你好"})
response = question_router.invoke({"question": "請列資料結構的 description?"})
print(response.additional_kwargs['tool_calls'])
print('tool_calls' in response.additional_kwargs)

[{'id': 'call_uHJXldU3p5E4SEqjBESiLU4T', 'function': {'arguments': '{"query":"東京的經緯度是多少?"}', 'name': 'web_search'}, 'type': 'function'}]
[{'id': 'call_n9iLDnBRZfgXqKqO6NyjYyOL', 'function': {'arguments': '{"query":"初級會計學（一） grading"}', 'name': 'vectorstore'}, 'type': 'function'}]
[{'id': 'call_CMIUrW4bbCb0u3f90pMLvN3M', 'function': {'arguments': '{"query":"data structures"}', 'name': 'vectorstore'}, 'type': 'function'}]
True


### Retriever

In [24]:
#construct retreiver
embeddings_model = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
new_db = FAISS.load_local("faiss_index", embeddings_model, allow_dangerous_deserialization=True)

retriever = new_db.as_retriever()


In [None]:
from langchain_text_splitters import CharacterTextSplitter
# file_paths = [
#     "/Users/boruchen/Documents/langchain-practice/course_data/ncku_CoM_data.csv",
#     "/Users/boruchen/Documents/langchain-practice/course_data/ncku_hub_data.csv"
# ]

# all_documents = []

# for file_path in file_paths:
#     loader = CSVLoader(file_path=file_path)
#     documents = loader.load()
#     all_documents.extend(documents)  

# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

# docs = text_splitter.split_documents(all_documents)

# embeddings_model = OpenAIEmbeddings(openai_api_key= OPENAI_API_KEY)

# vectorstore = FAISS.from_documents(all_documents, embeddings_model)

# vectorstore.save_local("faiss_index")



new_db = FAISS.load_local("faiss_index", embeddings_model, allow_dangerous_deserialization=True)
retriever = new_db.as_retriever()



In [25]:
response = retriever.invoke("工資系課程")

In [30]:
print(len(response)) ## we need llm to get more courses data then just four courses.

4


### RAG Responder

In [31]:
llm_model = ChatOpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model="gpt-3.5-turbo") 


### Contextualize question ###
prompt_search_query = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
])

retriever_chain = create_history_aware_retriever(llm_model, retriever, prompt_search_query)



### Answer question ###
prompt_get_answer = ChatPromptTemplate.from_messages([
    ('system', 'Answer the user\'s questions based on the below context:\n\n{context}'),
    MessagesPlaceholder(variable_name="chat_history"),
    ('user', '{input}'),
])

document_chain = create_stuff_documents_chain(llm_model, prompt_get_answer)

retrieval_chain_combine = create_retrieval_chain(retriever_chain, document_chain)

In [32]:
retriever.invoke("初級會計學的出席分數")

[Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_CoM_data.csv', 'row': 0}, page_content='Course Name: 初級會計學（一）\nGrading: 期中考 Midterm Exam\t30\n期末考 Term exam\t40\n出席 Participation\t10\n作業 Assignments\t20\nDepartment: 會計系Accountancy\nTeaching Strategies: 其他:略 others\t100\nSyllabus Progress: 1\tCourse Introduction  and  Ch.1  Accounting in Action\n2\tCh.1  Accounting in Action\n3\tCh.2  The Recording Process\n4\tCh.2  Analyzing and Recording Transactions\n5\tCh.3  Adjusting the Accounts\n6\tCh.3  Adjusting the Accounts\n7\tCh.4  Completing the Accounting Cycle\n8\tCh.4  Completing the Accounting Cycle\n9\tMid-Term Exam(Ch 1~4)\n10\tCh.5  Accounting for Merchandising Operations.\n11\tCh.5  Accounting for Merchandising Operations.\n12\tCh.6  Inventories.\n13\tCh.6  Inventories.\n14\tCh.7  Fraud, Internal Control, and Cash\n15\tCh.7  Fraud, Internal Control, and Cash\n16\tproject presentation\n17\tproject presentation\n18\tFinal Exam (Ch 5~7)\nCourse Descripti

In [33]:
# testing retrieverchain
chat_history = []
query = '初級會計學出席分數佔比'
# 使用 retriever_chain 生成檢索查詢
search_query_response = retriever_chain.invoke({
    "input": query,
    "chat_history": chat_history
})

In [34]:
def rag_invoke(query: str, chat_history: list) -> str:
    """
    向 LLM 提問並返回回應。

    :param query: 要提問的問題
    :param chat_history: 之前的聊天記錄
    :return: LLM 的回答
    """
    response = retrieval_chain_combine.invoke({"input": query, "chat_history": chat_history})
    chat_history.append(HumanMessage(content=query))
    chat_history.append(AIMessage(content=response['answer']))
    return response['answer']

#### Testing

In [35]:
chat_history = []
query = "請列出資料結構這堂課的分數百分比"
response = retrieval_chain_combine.invoke({"input": query, "chat_history": chat_history}) 
print(response['answer'])
chat_history.append(HumanMessage(content=query))
chat_history.append(AIMessage(content=response['answer']))

query2 = "那這堂課的課程描述是什麼"
response2 = retrieval_chain_combine.invoke({
    "input": query2,
    "chat_history": chat_history
}) 
print(response2['answer'])

資料結構這堂課的分數百分比如下：
- 期中考 (Midterm Exam): 40%
- 期末考 (Term Exam): 25%
- 平時測驗 (Quizzes): 25%
- 出席 (Participation): 10%
這堂資料結構課程旨在幫助學生選擇適當的資料結構來應對大型應用程式的複雜性。課程將涵蓋結構化問題解決、資料抽象、軟體工程原則以及算法的比較分析。課程包含許多C++範例程式，因此具備C++編碼能力對這門課至關重要。課程將涵蓋以下8個主題：
1. C++程式環境介紹
2. 資料結構基本概念
3. 陣列和結構
4. 堆疊和佇列
5. 列表
6. 樹
7. 排序
8. 雜湊


In [36]:
query1 = "請列出會計系 : 初級會計學（一）的 grading"
rag_invoke(query1, chat_history)

'會計系：初級會計學（一）的 grading如下：\n- 期中考 (Midterm Exam): 30%\n- 期末考 (Term Exam): 40%\n- 出席 (Participation): 10%\n- 作業 (Assignments): 20%'

In [37]:
query2 = "我問過哪些課程的資訊，幫我列出來"
rag_invoke(query2, chat_history)

'您問過以下課程的資訊：\n1. 初級會計學（一）\n2. 會計與金融數據分析\n3. 初級會計學\n4. 高級會計學（二）'

In [38]:
query3 = "可以告訴我我上一次問的那堂課的出席分數佔比嗎"
rag_invoke(query3, chat_history)

'您上一次問的是「會計系：初級會計學（一）」，該課程的出席分數佔比為10%。'

### Retrieval Grader

In [39]:
class GradeDocuments(BaseModel):
    """
    確認提取文章與問題是否有關(yes/no)
    """

    binary_score: str = Field(description="請問文章與問題是否相關。('yes' or 'no')")

# Prompt Template
instruction = """
你是一個評分的人員，負責評估文件與使用者問題的關聯性。
如果文件包含與使用者問題相關的關鍵字或語意，則將其評為相關。
輸出 'yes' or 'no' 代表文件與問題的相關與否。
"""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",instruction),
        ("human", "文件: \n\n {document} \n\n 使用者問題: {question}"),
    ]
)

# Grader LLM
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY ,model="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# 使用 LCEL 語法建立 chain
retrieval_grader = grade_prompt | structured_llm_grader

In [40]:
# 測試 grader 功能
question = "會計系有什麼課?"
docs = retriever_chain.invoke({
    "input": query,
    "chat_history": chat_history
})

response =  retrieval_grader.invoke({"question": question, "document": docs})
print(response)

# 測試 grader 功能
question = "今天天氣好嗎?"
docs = retriever_chain.invoke({
    "input": question,
    "chat_history": chat_history
})
doccs = docs[0].page_content
response =  retrieval_grader.invoke({"question": question, "document": docs})
print(response)

binary_score='yes'
binary_score='no'


### Hallucination Grader

In [41]:
class GradeHallucinations(BaseModel):
    """
    確認答案是否為虛構(yes/no)
    """

    binary_score: str = Field(description="答案是否由為虛構。('yes' or 'no')")

# Prompt Template
instruction = """
你是一個評分的人員，負責確認LLM的回應是否為虛構的。
以下會給你一個文件與相對應的LLM回應，請輸出 'yes' or 'no'做為判斷結果。
'Yes' 代表LLM的回答是虛構的，未基於文件內容 'No' 則代表LLM的回答並未虛構，而是基於文件內容得出。
"""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",instruction),
        ("human", "文件: \n\n {documents} \n\n LLM 回應: {generation}"),
    ]
)


# Grader LLM
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# 使用 LCEL 語法建立 chain
hallucination_grader = hallucination_prompt | structured_llm_grader

In [42]:
# 測試 grader 功能
question = "今天天氣很好?"
docs = retriever.invoke(question)
generation = retrieval_chain_combine.invoke({"input": question, "chat_history": chat_history})
hallucination_grader.invoke({"documents": generation['context'], "generation": generation})

GradeHallucinations(binary_score='no')

In [43]:
generation['context']

[Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_CoM_data.csv', 'row': 0}, page_content='Course Name: 初級會計學（一）\nGrading: 期中考 Midterm Exam\t30\n期末考 Term exam\t40\n出席 Participation\t10\n作業 Assignments\t20\nDepartment: 會計系Accountancy\nTeaching Strategies: 其他:略 others\t100\nSyllabus Progress: 1\tCourse Introduction  and  Ch.1  Accounting in Action\n2\tCh.1  Accounting in Action\n3\tCh.2  The Recording Process\n4\tCh.2  Analyzing and Recording Transactions\n5\tCh.3  Adjusting the Accounts\n6\tCh.3  Adjusting the Accounts\n7\tCh.4  Completing the Accounting Cycle\n8\tCh.4  Completing the Accounting Cycle\n9\tMid-Term Exam(Ch 1~4)\n10\tCh.5  Accounting for Merchandising Operations.\n11\tCh.5  Accounting for Merchandising Operations.\n12\tCh.6  Inventories.\n13\tCh.6  Inventories.\n14\tCh.7  Fraud, Internal Control, and Cash\n15\tCh.7  Fraud, Internal Control, and Cash\n16\tproject presentation\n17\tproject presentation\n18\tFinal Exam (Ch 5~7)\nCourse Descripti

In [44]:
docs

[Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_hub_data.csv', 'row': 20}, page_content='課程名稱: A9-222 運動與健康\n收穫: 7\n甜度: 8\n涼度: 9'),
 Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_hub_data.csv', 'row': 4}, page_content='課程名稱: A9-319 科技與國防\n收穫: 5\n甜度: 8\n涼度: 9'),
 Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_hub_data.csv', 'row': 28}, page_content='課程名稱: A9-204 職能治療與健康\n收穫: 7\n甜度: 7\n涼度: 9'),
 Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_hub_data.csv', 'row': 69}, page_content='課程名稱: A9-060 英語演說\n收穫: 8\n甜度: 7\n涼度: 4')]

### Answer Grader

In [45]:
class GradeAnswer(BaseModel):
    """
    確認答案是否可回應問題
    """

    binary_score: str = Field(description="答案是否回應問題。('yes' or 'no')")

# Prompt Template
instruction = """
你是一個評分的人員，負責確認答案是否回應了問題。
輸出 'yes' or 'no'。 'Yes' 代表答案確實回應了問題， 'No' 則代表答案並未回應問題。
"""
# Prompt
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",instruction),
        ("human", "使用者問題: \n\n {question} \n\n 答案: {generation}"),
    ]
)

# LLM with function call
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# 使用 LCEL 語法建立 chain
answer_grader = answer_prompt | structured_llm_grader

In [46]:
#測試 grader 功能
question = "今天天氣真好?"
docs = retriever.invoke(question)
generation = retrieval_chain_combine.invoke({"input": question, "chat_history": chat_history})
print(generation)
answer_grader.invoke({"question": question,"generation": generation})

{'input': '今天天氣真好?', 'chat_history': [HumanMessage(content='請列出資料結構這堂課的分數百分比'), AIMessage(content='資料結構這堂課的分數百分比如下：\n- 期中考 (Midterm Exam): 40%\n- 期末考 (Term Exam): 25%\n- 平時測驗 (Quizzes): 25%\n- 出席 (Participation): 10%'), HumanMessage(content='請列出會計系 : 初級會計學（一）的 grading'), AIMessage(content='會計系：初級會計學（一）的 grading如下：\n- 期中考 (Midterm Exam): 30%\n- 期末考 (Term Exam): 40%\n- 出席 (Participation): 10%\n- 作業 (Assignments): 20%'), HumanMessage(content='我問過哪些課程的資訊，幫我列出來'), AIMessage(content='您問過以下課程的資訊：\n1. 初級會計學（一）\n2. 會計與金融數據分析\n3. 初級會計學\n4. 高級會計學（二）'), HumanMessage(content='可以告訴我我上一次問的那堂課的出席分數佔比嗎'), AIMessage(content='您上一次問的是「會計系：初級會計學（一）」，該課程的出席分數佔比為10%。')], 'context': [Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_CoM_data.csv', 'row': 0}, page_content='Course Name: 初級會計學（一）\nGrading: 期中考 Midterm Exam\t30\n期末考 Term exam\t40\n出席 Participation\t10\n作業 Assignments\t20\nDepartment: 會計系Accountancy\nTeaching Strategies: 其他:略 others\t100\nSyllabus Progress: 1\tCourse 

GradeAnswer(binary_score='no')

In [47]:
generation

{'input': '今天天氣真好?',
 'chat_history': [HumanMessage(content='請列出資料結構這堂課的分數百分比'),
  AIMessage(content='資料結構這堂課的分數百分比如下：\n- 期中考 (Midterm Exam): 40%\n- 期末考 (Term Exam): 25%\n- 平時測驗 (Quizzes): 25%\n- 出席 (Participation): 10%'),
  HumanMessage(content='請列出會計系 : 初級會計學（一）的 grading'),
  AIMessage(content='會計系：初級會計學（一）的 grading如下：\n- 期中考 (Midterm Exam): 30%\n- 期末考 (Term Exam): 40%\n- 出席 (Participation): 10%\n- 作業 (Assignments): 20%'),
  HumanMessage(content='我問過哪些課程的資訊，幫我列出來'),
  AIMessage(content='您問過以下課程的資訊：\n1. 初級會計學（一）\n2. 會計與金融數據分析\n3. 初級會計學\n4. 高級會計學（二）'),
  HumanMessage(content='可以告訴我我上一次問的那堂課的出席分數佔比嗎'),
  AIMessage(content='您上一次問的是「會計系：初級會計學（一）」，該課程的出席分數佔比為10%。')],
 'context': [Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_CoM_data.csv', 'row': 0}, page_content='Course Name: 初級會計學（一）\nGrading: 期中考 Midterm Exam\t30\n期末考 Term exam\t40\n出席 Participation\t10\n作業 Assignments\t20\nDepartment: 會計系Accountancy\nTeaching Strategies: 其他:略 others\t100\nSyllabus Prog

### Memory

In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_text_splitters import RecursiveCharacterTextSplitter
### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    retrieval_chain_combine,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

#### testing

In [None]:
conversational_rag_chain.invoke(
    {"input": "資料結構的評分如何?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'資料結構的評分方式如下：\n- 期中考 Midterm Exam 40%\n- 期末考 Term exam 25%\n- 平時測驗 Quizzes 25%\n- 出席 Participation 10%'

In [None]:
conversational_rag_chain.invoke(
    {"input": "我剛剛是問哪一門課?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'您剛剛問的是「資料結構」這門課程。'

In [74]:
conversational_rag_chain.invoke(
    {"input": "我剛剛是問哪一門課?"},
    config={
        "configurable": {"session_id": "zxc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'您剛剛問的是「基本財務學」這門課。'

In [None]:
print(store)

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='我問過哪些課程?'), AIMessage(content='您問過以下課程：\n1. 工業與資訊管理專題（二）\n2. 商事法\n3. 計算機概論\n4. 服務學習（一）'), HumanMessage(content='資料結構的評分如何?'), AIMessage(content='資料結構的評分方式如下：\n- 期中考 Midterm Exam 40%\n- 期末考 Term exam 25%\n- 平時測驗 Quizzes 25%\n- 出席 Participation 10%'), HumanMessage(content='我剛剛是問哪一門課?'), AIMessage(content='您剛剛問的是「資料結構」這門課程。')]), 'zxc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='我剛剛是問哪一門課?'), AIMessage(content='您剛剛問的是「工業與資訊管理專題（二）」這門課。')])}


In [None]:
for user in store:
    print(user)

abc123
zxc123


## LangGraph

### Graph state

In [70]:
class GraphState(TypedDict):
    """
    State of graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """
    question : str
    generation : str
    documents : List[str]
    # chat_history: List[str]
    session_id : int

### Nodes / Conditional edges

In [65]:
def retrieve(state):
    """
    Retrieve documents related to the question.

    Args:
        state (dict):  The current state graph

    Returns:
        state (dict): New key added to state, documents, that contains list of related documents.
    """

    print("---RETRIEVE---")
    question = state["question"]
    session_id = state["session_id"]
    # chat_history = state.get("chat_history", [])


    # Retrieval
    user_chat_history = get_session_history(session_id)
    documents = retriever_chain.invoke({"input": question,
    "chat_history":user_chat_history})

    return {"documents":documents, "question":question, "chat_history": chat_history}

def web_search(state):
    """
    Web search based on the re-phrased question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with appended web results
    """

    print("---WEB SEARCH---")
    question = state["question"]
    chat_history = state["chat_history"]
    # documents = state["documents"] if state["documents"] else []

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = [Document(page_content=d["content"]) for d in docs]

    documents = web_results

    return {"documents": documents, "question": question, "chat_history":chat_history}

def retrieval_grade(state):
    """
    filter retrieved documents based on question.

    Args:
        state (dict):  The current state graph

    Returns:
        state (dict): New key added to state, documents, that contains list of related documents.
    """

    # Grade documents
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")

    documents = state["documents"]
    question = state["question"]
    chat_history = state["chat_history"]
    session_id = state["session_id"]

    # Score each doc
    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score.binary_score
        if grade == "yes":
            print("  -GRADE: DOCUMENT RELEVANT-")
            filtered_docs.append(d)
        else:
            print("  -GRADE: DOCUMENT NOT RELEVANT-")
            continue
    chat_history = state["chat_history"]
    return {"documents": filtered_docs, "question": question, "chat_history": chat_history}

def rag_generate(state):
    """
    Generate answer using  vectorstore / web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """

    print("---GENERATE IN RAG MODE---")
    question = state["question"]
    documents = state["documents"]
    chat_history = state["chat_history"]


    # RAG generation
    # generation = rag_invoke(question, chat_history)
    generation = retrieval_chain_combine.invoke({"input": question, "chat_history": chat_history})
    return {"documents": documents, "question": question, "generation": generation, "chat_history": chat_history}

# def plain_answer(state):
#     """
#     Generate answer using the LLM without vectorstore.

#     Args:
#         state (dict): The current graph state

#     Returns:
#         state (dict): New key added to state, generation, that contains LLM generation
#     """

#     print("---GENERATE PLAIN ANSWER---")
#     question = state["question"]
#     generation = llm_chain.invoke({"question": question})
#     return {"question": question, "generation": generation}


### Edges ###
def route_question(state):
    """
    Route question to web search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    source = question_router.invoke({"question": question})

    # Fallback to plain LLM or raise error if no decision
    if "tool_calls" not in source.additional_kwargs:
        print("  -ROUTE TO PLAIN LLM-")
        return "plain_answer"
    if len(source.additional_kwargs["tool_calls"]) == 0:
      raise "Router could not decide source"

    # Choose datasource
    datasource = source.additional_kwargs["tool_calls"][0]["function"]["name"]
    if datasource == 'web_search':
        print("  -ROUTE TO WEB SEARCH-")
        return "web_search"
    elif datasource == 'vectorstore':
        print("  -ROUTETO VECTORSTORE-")
        return "vectorstore"

def route_retrieval(state):
    """
    Determines whether to generate an answer, or use websearch.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ROUTE RETRIEVAL---")
    filtered_documents = state["documents"]

    if not filtered_documents:
        # All documents have been filtered check_relevance
        print("  -DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, ROUTE TO WEB SEARCH-")
        return "web_search"
    else:
        # We have relevant documents, so generate answer
        print("  -DECISION: GENERATE WITH RAG LLM-")
        return "rag_generate"

def grade_rag_generation(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score.binary_score

    # Check hallucination
    if grade == "no":
        print("  -DECISION: GENERATION IS GROUNDED IN DOCUMENTS-")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question,"generation": generation})
        grade = score.binary_score
        if grade == "yes":
            print("  -DECISION: GENERATION ADDRESSES QUESTION-")
            return "useful"
        else:
            print("  -DECISION: GENERATION DOES NOT ADDRESS QUESTION-")
            return "not useful"
    else:
        print("  -DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY-")
        return "not supported"

In [50]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("web_search", web_search) # web search
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("retrieval_grade", retrieval_grade) # retrieval grade
workflow.add_node("rag_generate", rag_generate) # rag
# workflow.add_node("plain_answer", plain_answer) # llm

# Build graph
workflow.set_conditional_entry_point(
    route_question,
    {
        "web_search": "web_search",
        "vectorstore": "retrieve",
        # "plain_answer": "plain_answer",
    },
)
workflow.add_edge("retrieve", "retrieval_grade")
workflow.add_edge("web_search", "retrieval_grade")
workflow.add_conditional_edges(
    "retrieval_grade",
    route_retrieval,
    {
        "web_search": "web_search",
        "rag_generate": "rag_generate",
    },
)
workflow.add_conditional_edges(
    "rag_generate",
    grade_rag_generation,
    {
        "not supported": "rag_generate", # Hallucinations: re-generate
        "not useful": "web_search", # Fails to answer question: fall-back to web-search
        "useful": END,
    },
)
# workflow.add_edge("plain_answer", END)

# Compile
app = workflow.compile()

In [63]:
def run(question, session_id):
    inputs = {"question": question,
              "session_id": session_id}
    for output in app.stream(inputs):
        print("\n")

    # Final generation
    if 'rag_generate' in output.keys():
        print(output['rag_generate']['generation']['answer'])
    elif 'plain_answer' in output.keys():
        print(output['plain_answer']['generation'])

In [52]:
run("初級會計學的評分標準?")

---ROUTE QUESTION---
  -ROUTETO VECTORSTORE-
---RETRIEVE---


---CHECK DOCUMENT RELEVANCE TO QUESTION---
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
---ROUTE RETRIEVAL---
  -DECISION: GENERATE WITH RAG LLM-


---GENERATE IN RAG MODE---
---CHECK HALLUCINATIONS---
  -DECISION: GENERATION IS GROUNDED IN DOCUMENTS-
---GRADE GENERATION vs QUESTION---
  -DECISION: GENERATION ADDRESSES QUESTION-


初級會計學的評分標準如下：
- 期中考 Midterm Exam 佔30%
- 期末考 Term exam 佔40%
- 出席 Participation 佔10%
- 作業 Assignments 佔20%


In [53]:
retriever_chain.invoke({"input":"工資系的作業研究課程概述", "chat_history":chat_history})

[Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_CoM_data.csv', 'row': 130}, page_content='Course Name: 作業研究\nGrading: \nDepartment: 工資系Industrial and Information Management\nTeaching Strategies: \nSyllabus Progress: \nCourse Description: ■ 同步遠距教學主播學校  \xa0\xa0\xa0\xa0課程之收播學校與系所  \xa0\xa0\xa0\xa0校內系所：工業與資訊管理學系'),
 Document(metadata={'source': '/Users/boruchen/Documents/langchain-practice/ncku_CoM_data.csv', 'row': 142}, page_content='Course Name: 實驗設計導論\nGrading: 出席 Participation\t5\n作業 Assignments\t25\n期中考 Midterm Exam\t30\n期末考 Term exam\t40\nDepartment: 工資系Industrial and Information Management\nTeaching Strategies: 講授 Lecture\t100\nSyllabus Progress: \nCourse Description: The knowledge of design of experiment is useful, if not essential, to anyone pursuing\r\ngraduate work in any of the engineering, management, physical science, or behavioral\r\nscience fields. Students who work on quality engineering and product (process) design\r\nshould also benefit

In [54]:
run("工資系的企業通訊網路評分方式？")

---ROUTE QUESTION---
  -ROUTETO VECTORSTORE-
---RETRIEVE---


---CHECK DOCUMENT RELEVANCE TO QUESTION---
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
---ROUTE RETRIEVAL---
  -DECISION: GENERATE WITH RAG LLM-


---GENERATE IN RAG MODE---
---CHECK HALLUCINATIONS---
  -DECISION: GENERATION IS GROUNDED IN DOCUMENTS-
---GRADE GENERATION vs QUESTION---
  -DECISION: GENERATION ADDRESSES QUESTION-


企業通訊網路這門課的評分方式如下：
- 期中考 Midterm Exam 50%
- 期末考 Term exam 25%
- 平時測驗 Quizzes 20%
- 其他 others: wiring 5%


In [55]:
run("工資系的線性代數評分方式？")

---ROUTE QUESTION---
  -ROUTETO VECTORSTORE-
---RETRIEVE---


---CHECK DOCUMENT RELEVANCE TO QUESTION---
  -GRADE: DOCUMENT NOT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
---ROUTE RETRIEVAL---
  -DECISION: GENERATE WITH RAG LLM-


---GENERATE IN RAG MODE---
---CHECK HALLUCINATIONS---
  -DECISION: GENERATION IS GROUNDED IN DOCUMENTS-
---GRADE GENERATION vs QUESTION---
  -DECISION: GENERATION ADDRESSES QUESTION-


工資系的線性代數評分方式如下：
- 平時測驗（Quizzes）15%
- 期中考（Midterm Exam）25%
- 期末考（Term exam）25%
- 作業（Assignments）35%


In [56]:
run("行銷管理的課程大綱為何？")

---ROUTE QUESTION---
  -ROUTETO VECTORSTORE-
---RETRIEVE---


---CHECK DOCUMENT RELEVANCE TO QUESTION---
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
  -GRADE: DOCUMENT NOT RELEVANT-
---ROUTE RETRIEVAL---
  -DECISION: GENERATE WITH RAG LLM-


---GENERATE IN RAG MODE---
---CHECK HALLUCINATIONS---
  -DECISION: GENERATION IS GROUNDED IN DOCUMENTS-
---GRADE GENERATION vs QUESTION---
  -DECISION: GENERATION ADDRESSES QUESTION-


行銷管理課程的大綱如下：
1. Introduction
2. Ch1 - An overview of Marketing Management
3. Ch1 - An overview of Marketing Management
4. Ch2 - Company and Marketing Strategy
5. Ch2 - Company and Marketing Strategy
6. Ch3/4 - Analyzing the marketing environment and gain customer insights
7. Time reserved for team brainstorming
8. Ch5 - Understanding consumer and business buyer behavior
9. Presentation I
10. Ch6 - Creating value for target customers
11. Ch8 - Developing new products and managing the product life cycle (optional)
12. 

In [57]:
run('列出兩堂通識課甜度較高的課程')

---ROUTE QUESTION---
  -ROUTETO VECTORSTORE-
---RETRIEVE---


---CHECK DOCUMENT RELEVANCE TO QUESTION---
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
  -GRADE: DOCUMENT RELEVANT-
---ROUTE RETRIEVAL---
  -DECISION: GENERATE WITH RAG LLM-


---GENERATE IN RAG MODE---
---CHECK HALLUCINATIONS---
  -DECISION: GENERATION IS GROUNDED IN DOCUMENTS-
---GRADE GENERATION vs QUESTION---
  -DECISION: GENERATION ADDRESSES QUESTION-


以下是兩堂通識課甜度較高的課程：
1. 課程名稱: A9-404 通識教育:變動世界之素養與技能
   甜度: 9

2. 課程名稱: A9-237 傾聽與對話:冰山理論奇幻之旅
   甜度: 9
