<a href="https://colab.research.google.com/github/akapo/class-llm/blob/main/chapter3/llm_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 실습 환경 설정

In [None]:
!pip install -qU openai
!pip install -qU langchain langchain-openai langchain-community langchainhub langchain-experimental langgraph
!pip install -qU huggingface_hub langchain-google-genai langchain-anthropic
!pip install -Uq sentence-transformers
!pip install -qU python-dotenv
!pip install -qU pymupdf pypdf unstructured markdown
!pip install -qU numexpr
!pip install -qU faiss-cpu
!pip install -qU chromadb
!pip install -qU bs4
!pip install -qU google-search-results duckduckgo-search
!pip install -qU streamlit
!pip install -qU streamlit-chat

In [None]:
import langchain
langchain.debug = False
langchain.verbose = False

# LangSmith 를 이용한 모니터링
os.environ["LANGCHAIN_PROJECT"] = "chain-monitor"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]="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"
os.environ["LANGCHAIN_TRACING_V2"]="true"

# **LLM활용 인공지능 앱 만들기**

**Web index 파일**  
- 저장위치: **home.py**

In [None]:
import streamlit as st

st.set_page_config(
    page_title="Langchain Streamlit App Examples",
    page_icon='💬',
    layout='wide'
)

st.header("Chatbot Implementations with Langchain + Streamlit")
st.write("""
[![view source code ](https://img.shields.io/badge/GitHub%20Repository-gray?logo=github)](https://github.com/akapo/llm-app)
""")
st.write("""
Langchain은 LLM(언어 모델)을 사용하여 애플리케이션 개발을 간소화하도록 설계된 강력한 프레임워크입니다. 다양한 구성 요소의 포괄적인 통합을 제공하여 강력한 응용 프로그램을 만들기 위해 조립 프로세스를 단순화합니다.

Langchain의 힘을 활용하면 챗봇 생성이 쉬워집니다. 다음은 다양한 사용 사례에 맞는 챗봇 구현의 몇 가지 예입니다.

- **💬translato**: 번역 서비스 앱(다양한 언어 지원).
- **online_chatbot**: 인터넷에 접속하여 정보를 검색하는 챗봇 구현.
- **💽memorye chatbot**: 컨텍스트를 유지하며 기억력을 가지는 챗봇 구현.
- **📄rag_chatbot**: 입력해준 문서를 기반으로 답변을 생성하는 챗봇 구현.
- **⭐coteacher**: 프로그래밍 인공지능 보조교사 구현.
- **🎓Knowlegebase**: 인공지능 보조교사에게 지식 주입.

각 챗봇의 샘플 사용법을 살펴보려면 해당 챗봇 섹션으로 이동하세요.""")

## **1) 번역 서비스 앱**

**번역 서비스 앱**  
- 저장위치: **pages/1_💬translator.py**

In [None]:
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

langs = ["English", "Japanese", "Chinese",
         "Korean",  "Italian", "French", "Spanish",
         "Russian", "Vietnamise"]  #번역 언어를 나열

st.set_page_config(page_title="언어 번역 서비스", page_icon="💬", layout='wide')
st.header('언어 번역 서비스')

#웹페이지 왼쪽에 언어를 선택할 수 있는 라디오 버튼
with st.sidebar:
     language = st.radio('번역을 원하는 언어(출력)를 선택해주세요.:', langs)

# text_area에 입력된 사용자의 텍스트
prompt = st.text_area('번역을 원하는 텍스트를 입력하세요(언어 자동감지)')

trans_template = PromptTemplate(
    input_variables=['trans'],
    # '당신의 일은 이 텍스트를 ___어로 번역하는 것입니다.\n TEXT: {trans}'
    template='Your task is to translate this text to ' + language +
    'Print only the translation results.\nTEXT: {trans}'
)

llm = ChatOpenAI(model_name='gpt-4o-mini', temperature=0.0)

trans_chain = LLMChain(
    llm=llm, prompt=trans_template, verbose=True, output_key='translate')

# 프롬프트(prompt)가 있으면 이를 처리하고 화면에 응답을 작성
if st.button("번역"):
    if prompt:
        response = trans_chain({'trans': prompt})
        st.info(response['translate'])

## **2) 기본 챗봇**

In [None]:
# 챗봇 기본
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
st.title("기본 챗봇 ")

prompt = st.chat_input("하고 싶은 말")

if prompt:  # 입력된 문자열이 있는 경우(None도 아니고 빈 문자열도 아닌 경우)
    with st.chat_message("user"): # 사용자 아이콘 사용
        st.markdown(prompt) # 입력된 받은 내용을 마크다운으로 해석하여 표시

    with st.chat_message("assistant"): # AI 아이콘 사용
        response = "대답함"   # 응답 내용을 "대답함" 이라는 문자열로 설정
        st.markdown(response) # 응답 내용을 마크다운으로 해석하여 표시

# 문제점: 이전 대화 내용 사라짐. 항상 응답이 "대답함"임

In [None]:
# 대화내용 보존 업그레이드
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
# StreamlitChatMessageHistory 추가
from langchain_community.chat_message_histories import StreamlitChatMessageHistory

st.title("대화내용 보존 챗봇")

# chat history
history = StreamlitChatMessageHistory() # StreamlitChatMessageHistory 생성

# 지금까지 대화 내역 모두 복원
for message in history.messages:
    st.chat_message(message.type).write(message.content) # 개별 대화 내역 추가

query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        history.add_user_message(query) # 사용자가 한 말 대화 내역에 기록
        st.markdown(query)

    with st.chat_message("assistant"):
        response = "대답함"
        history.add_ai_message(response) # AI가 한 말 대화 내역에 기록
        st.markdown(response)

# 문제점: 항상 응답이 "대답함"임

In [None]:
# Chat Model 추가하여 AI응답 만들어냄
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
# 추가
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage

st.title("AI응답 챗봇")

# LLM 모델 생성
llm = ChatOpenAI(model_name ='gpt-4o-mini', temperature=0.5)

# chat history
history = StreamlitChatMessageHistory()

for message in history.messages:
    st.chat_message(message.type).write(message.content)

query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        history.add_user_message(query)
        st.markdown(query)

    with st.chat_message("assistant"):
        messages = [HumanMessage(content=query)] # 사용자 입력으로 대화내용을 만들고
        response = llm.invoke(messages) # 그것을 바탕으로 AI응답을 얻어냄.
        history.add_ai_message(response)
        st.markdown(response.content)   # response.content로 변경

# 문제점: '2024년 국민의힘 대표 선거 결과를 알려줘' -> '죄송하지만, 2023년 10월까지의 정보만 가지고 있으며 ...'

## **3) 고급 챗봇**

**외부 정보 검색 기능이 추가된 온라인 챗봇**  
- 저장위치: **pages/2_🌐online_chatbot.py**

In [None]:
# 외부 정보 검색 기능 추가
# 원달러 환율을 알려줄 수 있음
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
# 추가
from langchain import hub
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools

# 외부 검색 가능한 도구를 추가한 AgentExcutor 생성
def create_agent_chain():
    llm = ChatOpenAI(model_name ='gpt-4o', temperature=0.5)

    tools = load_tools(["ddg-search", "wikipedia"])    # tools 정의
    prompt = hub.pull("hwchase17/openai-tools-agent")  # tools-agent 프롬프트 로드
    agent = create_openai_tools_agent(llm, tools, prompt) # agent 생성

    return AgentExecutor(agent=agent, tools=tools) # AgentExecutor 리턴

st.set_page_config(page_title="온라인 챗봇", page_icon="🌐", layout='wide')
st.header('온라인 챗봇')

history = StreamlitChatMessageHistory()

for message in history.messages:
    st.chat_message(message.type).write(message.content)

query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        history.add_user_message(query)
        st.markdown(query)

    with st.chat_message("assistant"):
        callback = StreamlitCallbackHandler(st.container())
        agent_chain = create_agent_chain()
        response = agent_chain.invoke(  # agent_chain이 응답을 반환할 때 [callback]이 호출되면서 AI의 응답이 자동으로 출력됨.
            {"input": query},
            {"callbacks": [callback]},
        )
        #messages = [HumanMessage(content=query)]  # 삭제
        #response = llm.invoke(messages)            # 삭제
        history.add_ai_message(response["output"])
        st.markdown(response["output"])  # agent_chain의 응답이므로 변경

# 문제점: 기억이 없음. 내 이름을 알려줘도 모름. 1 to 50 게임도 못함.

**기억력있는 온라인 챗봇**  
- 저장위치: **pages/3_💽memorye chatbot.py**

In [None]:
# ConversationBufferMemory로 기억력 추가
# 1 to 50 게임 가능
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
from langchain import hub
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain_community.agent_toolkits.load_tools import load_tools
from langchain.agents import AgentExecutor, create_openai_tools_agent
# 추가
from langchain.memory import ConversationBufferMemory

# 외부 검색 가능한 도구를 추가한 AgentExcutor 생성
def create_agent_chain(history): # history를 파라미터로 받음
    llm = ChatOpenAI(model_name ='gpt-4o', temperature=0.5)

    tools = load_tools(["ddg-search", "wikipedia"])
    prompt = hub.pull("hwchase17/openai-tools-agent")
    agent = create_openai_tools_agent(llm, tools, prompt)

    # 기억을 위해 ConversationBufferMemory 생성
    memory = ConversationBufferMemory(
        chat_memory=history, memory_key="chat_history", return_messages=True)

    return AgentExecutor(agent=agent, tools=tools, memory=memory)  # memory 추가

st.set_page_config(page_title="기억력 챗봇", page_icon="💽", layout='wide')
st.header('기억력 있는 온라인 챗봇')

history = StreamlitChatMessageHistory()

for message in history.messages:
    st.chat_message(message.type).write(message.content)

query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        #history.add_user_message(query)  # 삭제
        st.markdown(query)

    with st.chat_message("assistant"):
        callback = StreamlitCallbackHandler(st.container())
        agent_chain = create_agent_chain(history) # history를 파라미터로 패싱
        response = agent_chain.invoke(
            {"input": query},
            {"callbacks": [callback]},
        )
        #history.add_ai_message(response["output"]) # 삭제
        st.markdown(response["output"])  # agent_chain의 응답이므로 변경

""" Test 항목
1. 이름 기억 확인
2. 인터넷을 통해 알 수 있는 최신 사건 질의
   1) (아주 최근 있었던 일) 2024년 국민의힘 정당 대표는 누구야?
   2) 지금 이 시각 청주 날씨는?
   3) BTS 멤버의 나이는?
3. 수학 추론 질의
   1) 20년 후에 두 배로 돌려주는 예금 상품이 출시되었데.. 연 복리 몇% 상품인거야?
   2) 10년만에 두배가 되려면 이율이 몇 % 여야 해?
   4) 연복리 6%인 예금으로 10년 만에 1억을 만들려면 1년에 얼마씩 저금해야 할까?
   5) 매년 이자의 15.4%를 이자소득세로 내야해. 다시 계산해줘
   6) 확실해? 연도별 원금, 이자, 세금, 누적합계를 보여주는 표를 만들어줘.
4. 기타 질문
   1) 오늘이 몇 일이야?
"""

**ReAct 버전** - PythonREPL 버그 있음

In [None]:
# ConversationBufferMemory로 기억력 추가
# 1 to 50 게임 가능
from dotenv import load_dotenv
load_dotenv()

import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
from langchain import hub
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_experimental.utilities import PythonREPL
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain.agents import Tool, load_tools, create_react_agent, AgentExecutor
# 추가
from langchain.memory import ConversationBufferMemory

# 외부 검색 가능한 도구를 추가한 AgentExcutor 생성
def create_agent_chain(history): # history를 파라미터로 받음
    llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

    tavily_tool = TavilySearchResults(k=5)

    python_repl = PythonREPL()
    python_repl_tool = Tool(
        name="python_repl",
        description="A Python shell. Use this to execute python commands. \
        Input should be a valid python command. \
        If you want to see the output of a value, you should print it out with `print(...)`.",
        func=python_repl.run,
    )

    tools = [tavily_tool, python_repl_tool]

    prompt = hub.pull("hwchase17/react-chat")

    # 기억을 위해 ConversationBufferMemory 생성
    memory = ConversationBufferMemory(
        chat_memory=history, memory_key="chat_history", return_messages=True)

    agent = create_react_agent(llm, tools, prompt)
    return AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True,
                memory=memory)  # memory 추가

st.set_page_config(page_title="메모리 챗봇", page_icon="💽", layout='wide')
st.header('기억력 있는 온라인 챗봇')

# chat history
if('app_name' not in st.session_state):
    st.session_state.app_name = 'memory_chatbot'
elif(st.session_state.app_name != 'memory_chatbot'):
    st.session_state.app_name = 'memory_chatbot'
    StreamlitChatMessageHistory().clear();

history = StreamlitChatMessageHistory()

for message in history.messages:
    st.chat_message(message.type).write(message.content)

query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        #history.add_user_message(query)  # 삭제
        st.markdown(query)

    with st.chat_message("assistant"):
        callback = StreamlitCallbackHandler(st.container())
        agent_chain = create_agent_chain(history) # history를 파라미터로 패싱
        response = agent_chain.invoke(
            {"input": query},
            {"callbacks": [callback]},
        )
        #history.add_ai_message(response["output"]) # 삭제
        st.markdown(response["output"])  # agent_chain의 응답이므로 변경

## **4) RAG 챗봇**

**RAG Q&A 앱**
- 저장위치: **4_📄rag_chatbot1.py**

In [None]:
# RAG Q&A 앱 (RetrievalQA로 구현, 기억력 없음)
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
#추가
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    TextLoader, JSONLoader, UnstructuredMarkdownLoader, PyMuPDFLoader)
from langchain.chains import RetrievalQA
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore

st.title("RAG Q&A 앱")

embedding_model = OpenAIEmbeddings()
# FAISS vector store 관련
vs = FAISS(
    embedding_function=embedding_model,
    index=faiss.IndexFlatL2(1536),
    docstore=InMemoryDocstore(), index_to_docstore_id={})

def vs_add_file(file_path):
    if file_path.endswith('.txt'):
        text_loader = TextLoader(file_path)
        raw_doc = text_loader.load()
    elif file_path.endswith('.md'):
        markdown_loader = UnstructuredMarkdownLoader(file_path)
        raw_doc = markdown_loader.load()
    elif file_path.endswith('.pdf'):
        pdf_loader = PyMuPDFLoader(file_path)
        raw_doc = pdf_loader.load()
    elif file_path.endswith('.json'):
        json_loader = JSONLoader(file_path)
        raw_doc = json_loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000, chunk_overlap = 200)
    docs = text_splitter.split_documents(raw_doc)

    vs.add_documents(docs)

def save_file(file):
    import os
    folder = 'tmp'
    if not os.path.exists(folder):
        os.makedirs(folder)

    file_path = f'./{folder}/{file.name}'
    with open(file_path, 'wb') as f:
        f.write(file.getvalue())
    return file_path

uploaded_files = st.file_uploader("Choose a data file", accept_multiple_files=True)
for file in uploaded_files:
    file_path = save_file(file)
    vs_add_file(file_path)

# retriever
def create_retriever():
    gpt4o = ChatOpenAI(model_name ='gpt-4o-mini', temperature=0.5)

    retriever = RetrievalQA.from_chain_type(
        llm=gpt4o,
        chain_type="stuff",
        retriever=vs.as_retriever(
            search_type='mmr',
            search_kwargs={'k':8, 'fetch_k':12}
        ),
        return_source_documents=True
    )
    return retriever

# 쿼리 및 응답 처리
query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        st.markdown(query)

    with st.chat_message("assistant"):
        callback = StreamlitCallbackHandler(st.container())

        retriever = create_retriever()
        response = retriever.invoke(
            {"query": query},
            {"callbacks": [callback]},
        )
        st.markdown(response["result"])

        # to show references
        for idx, doc in enumerate(response['source_documents'],1):
            filename = os.path.basename(doc.metadata['source'])
            ref_title = f":blue[Reference {idx}: *{filename}*]"
            with st.popover(ref_title):
                st.caption(doc.page_content)

"""
1. 윤초시와 소녀는 어떤 관계인가?
2. 소설의 끝에서 소녀는 어떠한 결말을 맞이해? 근거를 들어서 설명해줘."""

**RAG 챗봇 (ConversationalRetrievalChain 구현)** - Chroma로 구현

In [None]:
# 실습 PASS
# RAG 챗봇 (ConversationalRetrievalChain 구현)
# ConversationalRetrievalChain을 사용함에도 불구하고 기억력 유지가 안됨
# 이유는 알지만 쉽게 해결하는 방법을 못찾겠음
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
from langchain import hub
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain_community.agent_toolkits.load_tools import load_tools
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.memory import ConversationBufferMemory
#추가
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    TextLoader, JSONLoader, UnstructuredMarkdownLoader, PyMuPDFLoader)
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain

st.title("RAG 챗봇")

# vector store 관련
db_dir = "chroma-db/"
embedding_model = OpenAIEmbeddings()

vs = Chroma("langchain_store", embedding_model, persist_directory = db_dir)

def vs_add_file(file_path):
    if file_path.endswith('.txt'):
        text_loader = TextLoader(file_path)
        raw_doc = text_loader.load()
    elif file_path.endswith('.md'):
        markdown_loader = UnstructuredMarkdownLoader(file_path)
        raw_doc = markdown_loader.load()
    elif file_path.endswith('.pdf'):
        pdf_loader = PyMuPDFLoader(file_path)
        raw_doc = pdf_loader.load()
    elif file_path.endswith('.json'):
        json_loader = JSONLoader(file_path)
        raw_doc = json_loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000, chunk_overlap  = 200)
    docs = text_splitter.split_documents(raw_doc)

    vs.add_documents(docs)

def save_file(file):
    import os
    folder = 'tmp'
    if not os.path.exists(folder):
        os.makedirs(folder)

    file_path = f'./{folder}/{file.name}'
    with open(file_path, 'wb') as f:
        f.write(file.getvalue())
    return file_path

uploaded_files = st.file_uploader("Choose a data file", accept_multiple_files=True)
for file in uploaded_files:
    file_path = save_file(file)
    vs_add_file(file_path)

# history
history = StreamlitChatMessageHistory()

for message in history.messages:
    st.chat_message(message.type).write(message.content)

# chains
def create_qa_chain(history):
    gpt4o = ChatOpenAI(model_name ='gpt-4o-mini', temperature=0.5)

    memory = ConversationBufferMemory(
        chat_memory=history,  memory_key="chat_history",
        input_key='question', output_key='answer',
        return_messages=True)

    retriever = vs.as_retriever(
            search_type='mmr',
            search_kwargs={'k':2, 'fetch_k':4}
        )

    qa_chain = ConversationalRetrievalChain.from_llm(
            llm = gpt4o,
            retriever = retriever,
            memory = memory,
            return_source_documents=True,
            verbose=True
        )
    return qa_chain

#
query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        st.markdown(query)

    with st.chat_message("assistant"):
        callback = StreamlitCallbackHandler(st.container())

        qa_chain = create_qa_chain(history)
        response = qa_chain.invoke(
            {"question": query},
            {"callbacks": [callback]},
        )
        st.markdown(response["answer"])

        # to show references
        for idx, doc in enumerate(response['source_documents'],1):
            print(doc)
            filename = os.path.basename(doc.metadata['source'])
            ref_title = f":blue[Reference {idx}: *{filename}*]"
            with st.popover(ref_title):
                st.caption(doc.page_content)

**RAG 챗봇 (create_history_aware_retriever 활용)**  - FAISS로 구현
- 저장위치: **4_📄rag_chatbot2.py**

In [None]:
# RAG 챗봇 (create_history_aware_retriever 구현)
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
#추가
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    TextLoader, JSONLoader, UnstructuredMarkdownLoader, PyMuPDFLoader)
from langchain_core.output_parsers import StrOutputParser
from langchain_core.outputs import LLMResult
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
st.set_page_config(page_title="RAG 챗봇", page_icon="📄", layout='wide')
st.header('RAG 챗봇')

embedding_model = OpenAIEmbeddings()

# FAISS vector store 관련
vs = FAISS(
    embedding_function=embedding_model,
    index=faiss.IndexFlatL2(1536),
    docstore=InMemoryDocstore(), index_to_docstore_id={})

def vs_add_file(file_path):
    if file_path.endswith('.txt'):
        text_loader = TextLoader(file_path)
        raw_doc = text_loader.load()
    elif file_path.endswith('.md'):
        markdown_loader = UnstructuredMarkdownLoader(file_path)
        raw_doc = markdown_loader.load()
    elif file_path.endswith('.pdf'):
        pdf_loader = PyMuPDFLoader(file_path)
        raw_doc = pdf_loader.load()
    elif file_path.endswith('.json'):
        json_loader = JSONLoader(file_path)
        raw_doc = json_loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500, chunk_overlap  = 100)
    docs = text_splitter.split_documents(raw_doc)

    vs.add_documents(docs)

def save_file(file):
    import os
    folder = 'tmp'
    if not os.path.exists(folder):
        os.makedirs(folder)

    file_path = f'./{folder}/{file.name}'
    with open(file_path, 'wb') as f:
        f.write(file.getvalue())
    return file_path

uploaded_files = st.file_uploader("Choose a data file", accept_multiple_files=True)
for file in uploaded_files:
    file_path = save_file(file)
    vs_add_file(file_path)

# chat history
if('app_name' not in st.session_state):
    st.session_state.app_name = 'rag_chatbot'
elif(st.session_state.app_name != 'rag_chatbot'):
    st.session_state.app_name = 'rag_chatbot'
    StreamlitChatMessageHistory().clear();

history = StreamlitChatMessageHistory()

for message in history.messages:
    st.chat_message(message.type).write(message.content)

# create_retriever_chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def create_retriever_chain(history):
    # LangChain의 create_history_aware_retriever를 사용해,
    # 과거의 대화 기록을 고려해 질문을 다시 표현하는 Chain을 생성
    rephrase_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            ("user", "{input}"),
            ("user", "위의 대화에서, 대화와 관련된 정보를 찾기 위한 검색 쿼리를 생성해 주세요."),
        ]
    )
    rephrase_llm = ChatOpenAI(model_name ='gpt-4o-mini', temperature=0.5)

    retriever=vs.as_retriever(
            search_type='mmr',
            search_kwargs={'k':8, 'fetch_k':12}
    )

    rephrase_chain = create_history_aware_retriever(
        rephrase_llm, retriever, rephrase_prompt
    )

    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "아래의 문맥만을 고려하여 질문에 답하세요.\n\n{context}"),
            (MessagesPlaceholder(variable_name="chat_history")),
            ("user", "{input}"),
        ]
    )

    callback = StreamlitCallbackHandler(st.container())

    qa_llm = ChatOpenAI(
        model_name ='gpt-4o',
        temperature=0.5,
        streaming=True,
        callbacks=[callback]
    )

    qa_chain = qa_prompt | qa_llm | StrOutputParser()

    # 두 Chain을 연결한 Chain을 생성
    conversational_retrieval_chain = (
        RunnablePassthrough.assign(context=rephrase_chain | format_docs) | qa_chain
    )

    return conversational_retrieval_chain

# 쿼리 및 응답
query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        history.add_user_message(query)
        st.markdown(query)

    with st.chat_message("assistant"):
        #callback = StreamlitCallbackHandler(st.container())

        retriever = create_retriever_chain(history)
        response = retriever.invoke(
            {"input": query, "chat_history": history.messages},
            #{"callbacks": [callback]},
        )
        history.add_ai_message(response)
        st.markdown(response)

"""
1. 한국사.pdf
  (출력 토큰 수 제한으로 한꺼번에 다 물어보면 안됨)
  1,2,3,4,5번 문제를 예쁘게 출력하고 정답과 그 이유를 알려줘
2. [만숑쌤]한국사 막판 키워드 정리(배포용).pdf
   1) 성종이 한일은?
   2) '아니 조선시대' or '아니 고려시대'
"""

**LangGraph 이용 버전**

In [None]:
# RAG 챗봇 (LangGraph)
# 소스코드를 더 다듬어야 함.
from dotenv import load_dotenv
load_dotenv()

import bs4
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools.retriever import create_retriever_tool
from langchain_community.vectorstores import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.schema import HumanMessage, AIMessage
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.prebuilt import create_react_agent

memory = SqliteSaver.from_conn_string(":memory:")
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


### Build retriever tool ###
tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post.",
)
tools = [tool]

agent_executor = create_react_agent(llm, tools, checkpointer=memory)

config = {"configurable": {"thread_id": "abc123"}}
response = agent_executor.invoke(
    {"messages": [HumanMessage(content="Autonomous Agents에서 Self-Reflection이 뭐야? 한글로 설명해줘.")]}, config
)
print(response["messages"])

[HumanMessage(content='Autonomous Agents에서 Self-Reflection이 뭐야? 한글로 설명해줘.', id='a841438c-f1ba-4001-aeb8-1cfe5932b4e5'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8RqXPkLd1xvi7phqrMmTcu6L', 'function': {'arguments': '{"query":"Self-Reflection"}', 'name': 'blog_post_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 78, 'total_tokens': 97}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_661538dc1f', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-f40106e8-a76a-4f8d-9c50-2eb33009d0ca-0', tool_calls=[{'name': 'blog_post_retriever', 'args': {'query': 'Self-Reflection'}, 'id': 'call_8RqXPkLd1xvi7phqrMmTcu6L', 'type': 'tool_call'}], usage_metadata={'input_tokens': 78, 'output_tokens': 19, 'total_tokens': 97}), ToolMessage(content='Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approa

## **5) 인공지능 보조교사 (FAISS ver.)**

**프로그래밍 인공지능 보조교사 구현**  
- 저장위치: **5_⭐coteacher.py**

In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
#추가
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.outputs import LLMResult
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.runnables import RunnablePassthrough
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore

st.set_page_config(page_title="인공지능 보조교사", page_icon="⭐", layout='wide')
st.header('프로그래밍 인공지능 보조교사')

embedding_model = OpenAIEmbeddings()
# FAISS vector store 관련
faiss_dir = './faiss'
if(os.path.isdir(faiss_dir) == False):
    vs = FAISS(
            embedding_function=embedding_model, index=faiss.IndexFlatL2(1536),
            docstore=InMemoryDocstore(), index_to_docstore_id={})
else :
    vs = FAISS.load_local(faiss_dir, embedding_model,
            allow_dangerous_deserialization=True)

# chat history
if('app_name' not in st.session_state):
    st.session_state.app_name = 'coteacher'
elif(st.session_state.app_name != 'coteacher'):
    st.session_state.app_name = 'coteacher'
    StreamlitChatMessageHistory().clear();

history = StreamlitChatMessageHistory()

if len(history.messages) == 0:  # 대화내역이 전무하다면...
    hello = "안녕하세요? 무슨이야기를 해볼까요?"
    history.add_ai_message(hello)

for message in history.messages:
    st.chat_message(message.type).write(message.content)

# create_retriever_chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def create_retriever_chain(history):
    # LangChain의 create_history_aware_retriever를 사용해,
    # 과거의 대화 기록을 고려해 질문을 다시 표현하는 Chain을 생성
    rephrase_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            ("user", "{input}"),
            ("user", "위의 대화에서, 대화와 관련된 정보를 찾기 위한 검색 쿼리를 생성해 주세요."),
        ]
    )
    rephrase_llm = ChatOpenAI(model_name ='gpt-4o-mini', temperature=0.5)

    retriever=vs.as_retriever(
            search_type='mmr',
            search_kwargs={'k':4, 'fetch_k':8}
    )

    rephrase_chain = create_history_aware_retriever(
        rephrase_llm, retriever, rephrase_prompt
    )

    coteacher_prompt = """
    You are an assistant teacher teaching programming desigend by 정진쌤.
    Please answer the student's questions appropriately.
    However, refuse requests to provide the correct answer code, to create a program, or to provide sample code.
    If requested, you can provide a psudo-code.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Use the following context to answer the question at the end.
    Please answer in Korean unless otherwise requested.\n
    {context}
    """

    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", coteacher_prompt),
            (MessagesPlaceholder(variable_name="chat_history")),
            ("user", "{input}"),
        ]
    )

    callback = StreamlitCallbackHandler(st.container())

    qa_llm = ChatOpenAI(
        model_name ='gpt-4o-mini',
        temperature=0.5,
        streaming=True,
        callbacks=[callback]
    )

    qa_chain = qa_prompt | qa_llm | StrOutputParser()

    # 두 Chain을 연결한 Chain을 생성
    conversational_retrieval_chain = (
        RunnablePassthrough.assign(context=rephrase_chain | format_docs) | qa_chain
    )

    return conversational_retrieval_chain

# 쿼리 및 응답
query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        history.add_user_message(query)
        st.markdown(query)

    with st.chat_message("assistant"):
        #callback = StreamlitCallbackHandler(st.container())

        retriever = create_retriever_chain(history)
        response = retriever.invoke(
            {"input": query, "chat_history": history.messages},
            #{"callbacks": [callback]},
        )
        history.add_ai_message(response)
        st.markdown(response)

**인공지능 보조교사 Knowlegebase 생성기**
- 저장위치: **6_🎓knowlegebase.py**

In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
#추가
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    TextLoader, JSONLoader, UnstructuredMarkdownLoader, PyMuPDFLoader)
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore

st.set_page_config(page_title="챗봇", page_icon="⭐", layout='wide')
st.header('인공지능 보조교사')
st.markdown("#### **Knowledgebase 생성기**")

embedding_model = OpenAIEmbeddings()
# FAISS vector store 관련
faiss_dir = './faiss'
if(os.path.isdir(faiss_dir) == False):
    vs = FAISS(
            embedding_function=embedding_model, index=faiss.IndexFlatL2(1536),
            docstore=InMemoryDocstore(), index_to_docstore_id={})
else :
    vs = FAISS.load_local(faiss_dir, embedding_model,
            allow_dangerous_deserialization=True)

def vs_add_file(file_path):
    if file_path.endswith('.txt'):
        text_loader = TextLoader(file_path)
        raw_doc = text_loader.load()
    elif file_path.endswith('.md'):
        markdown_loader = UnstructuredMarkdownLoader(file_path)
        raw_doc = markdown_loader.load()
    elif file_path.endswith('.pdf'):
        pdf_loader = PyMuPDFLoader(file_path)
        raw_doc = pdf_loader.load()
    elif file_path.endswith('.json'):
        json_loader = JSONLoader(file_path)
        raw_doc = json_loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500, chunk_overlap  = 100)
    docs = text_splitter.split_documents(raw_doc)

    if(len(docs)>0):
        vs.add_documents(docs)

    vs.save_local(faiss_dir)

def save_file(file):
    import os
    folder = 'tmp'
    if not os.path.exists(folder):
        os.makedirs(folder)

    file_path = f'./{folder}/{file.name}'
    with open(file_path, 'wb') as f:
        f.write(file.getvalue())
    return file_path

uploaded_files = st.file_uploader("Choose a data file", accept_multiple_files=True)
for file in uploaded_files:
    file_path = save_file(file)
    vs_add_file(file_path)

if(len(uploaded_files)):
    st.markdown("#### 업로드 완료 되었습니다.")

## **5) 인공지능 보조교사 (Chroma ver.)**

**프로그래밍 인공지능 보조교사 구현**  

In [None]:
# 보조교사 챗봇
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
#추가
from langchain_community.callbacks import StreamlitCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.outputs import LLMResult
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.runnables import RunnablePassthrough

st.set_page_config(page_title="챗봇", page_icon="⭐", layout='wide')
st.header('프로그래밍 인공지능 보조교사')

# vector store 관련
db_dir = "chroma-db/"
embedding_model = OpenAIEmbeddings()

vs = Chroma("langchain_store", embedding_model, persist_directory = db_dir)

# chat history
if('app_name' not in st.session_state):
    st.session_state.app_name = 'coteacher'
elif(st.session_state.app_name != 'coteacher'):
    st.session_state.app_name = 'coteacher'
    StreamlitChatMessageHistory().clear();

history = StreamlitChatMessageHistory()

if len(history.messages) == 0:  # 대화내역이 전무하다면...
    hello = "안녕하세요? 무슨이야기를 해볼까요?"
    history.add_ai_message(hello)

for message in history.messages:
    st.chat_message(message.type).write(message.content)

# create_retriever_chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def create_retriever_chain(history):
    # LangChain의 create_history_aware_retriever를 사용해,
    # 과거의 대화 기록을 고려해 질문을 다시 표현하는 Chain을 생성
    rephrase_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            ("user", "{input}"),
            ("user", "위의 대화에서, 대화와 관련된 정보를 찾기 위한 검색 쿼리를 생성해 주세요."),
        ]
    )
    rephrase_llm = ChatOpenAI(model_name ='gpt-4o-mini', temperature=0.5)

    retriever=vs.as_retriever(
            search_type='mmr',
            search_kwargs={'k':2, 'fetch_k':4}
    )

    rephrase_chain = create_history_aware_retriever(
        rephrase_llm, retriever, rephrase_prompt
    )

    coteacher_prompt = """
    You are an assistant teacher teaching programming desigend by 정진쌤.
    Please answer the student's questions appropriately.
    However, refuse requests to provide the correct answer code, to create a program, or to provide sample code.
    If requested, you can provide a psudo-code.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Use the following context to answer the question at the end.
    Please answer in Korean unless otherwise requested.\n
    {context}
    """

    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", coteacher_prompt),
            (MessagesPlaceholder(variable_name="chat_history")),
            ("user", "{input}"),
        ]
    )

    callback = StreamlitCallbackHandler(st.container())

    qa_llm = ChatOpenAI(
        model_name ='gpt-4o-mini',
        temperature=0.5,
        streaming=True,
        callbacks=[callback]
    )

    qa_chain = qa_prompt | qa_llm | StrOutputParser()

    # 두 Chain을 연결한 Chain을 생성
    conversational_retrieval_chain = (
        RunnablePassthrough.assign(context=rephrase_chain | format_docs) | qa_chain
    )

    return conversational_retrieval_chain

# 쿼리 및 응답
query = st.chat_input("하고 싶은 말")

if query:
    with st.chat_message("user"):
        history.add_user_message(query)
        st.markdown(query)

    with st.chat_message("assistant"):
        #callback = StreamlitCallbackHandler(st.container())

        retriever = create_retriever_chain(history)
        response = retriever.invoke(
            {"input": query, "chat_history": history.messages},
            #{"callbacks": [callback]},
        )
        history.add_ai_message(response)
        st.markdown(response)

**인공지능 보조교사 Knowlegebase 생성기**

In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
import streamlit as st
#추가
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    TextLoader, JSONLoader, UnstructuredMarkdownLoader, PyMuPDFLoader)
from langchain_openai import OpenAIEmbeddings

st.set_page_config(page_title="챗봇", page_icon="⭐", layout='wide')
st.header('인공지능 보조교사')
st.markdown("#### **Knowledgebase 생성기**")

# vector store 관련
db_dir = "chroma-db/"
embedding_model = OpenAIEmbeddings()

vs = Chroma("langchain_store", embedding_model, persist_directory = db_dir)

def vs_add_file(file_path):
    if file_path.endswith('.txt'):
        text_loader = TextLoader(file_path)
        raw_doc = text_loader.load()
    elif file_path.endswith('.md'):
        markdown_loader = UnstructuredMarkdownLoader(file_path)
        raw_doc = markdown_loader.load()
    elif file_path.endswith('.pdf'):
        pdf_loader = PyMuPDFLoader(file_path)
        raw_doc = pdf_loader.load()
    elif file_path.endswith('.json'):
        json_loader = JSONLoader(file_path)
        raw_doc = json_loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000, chunk_overlap  = 200)
    docs = text_splitter.split_documents(raw_doc)

    if(len(docs)>0):
        vs.add_documents(docs)

def save_file(file):
    import os
    folder = 'tmp'
    if not os.path.exists(folder):
        os.makedirs(folder)

    file_path = f'./{folder}/{file.name}'
    with open(file_path, 'wb') as f:
        f.write(file.getvalue())
    return file_path

uploaded_files = st.file_uploader("Choose a data file", accept_multiple_files=True)
for file in uploaded_files:
    file_path = save_file(file)
    vs_add_file(file_path)

if(len(uploaded_files)):
    st.markdown("#### 업로드 완료 되었습니다.")