In [25]:
import streamlit as st
import pandas as pd
from langchain.prompts import PromptTemplate
from datetime import datetime
from dotenv import load_dotenv
import os

#환경설정
ENV_PATH = './.env'
CACHE_DIR = './.cache'
FILES_DIR = os.path.join(CACHE_DIR, 'files')
EMBEDDINGS_DIR = os.path.join(CACHE_DIR, 'embeddings')

load_dotenv()


True

In [4]:
#Prompt 생성
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

chat_template =  ChatPromptTemplate.from_messages(
  [
    ("system", """당신은 느린 학습자 관련 교육 전문가입니다. 당신은 느린 학습자 관련 질문을 받고, 이에 대한 전문적인 답변을 제공합니다.
             아래 컨텍스트를 사용하여 질문에 답변하십시오. 답을 모르면 "모르겠습니다."라고 답변하세요. 답을 지어내지 마세요.
        컨텍스트: {context}"""),
    ("ai", "안녕하세요! 느린 학습자와 관련해서 어떤 사항이 궁금하신가요?"),
    ("human", "{user_input}"),
  ]
)


In [51]:
#LLM model
from langchain_openai import ChatOpenAI
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache
from langchain.callbacks import get_openai_callback #토큰 추적
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationChain
from langchain_core.runnables.history import RunnableWithMessageHistory


openai_api_key = os.getenv("openai_api_key")


llm = ChatOpenAI(model="gpt-4o", 
                  api_key=openai_api_key, 
                verbose=True,
                  )

#캐싱 : 인메모리 캐시를 사용. - 인메모리 vs. sqlite cache (추후 시간 비교해보기)
set_llm_cache(InMemoryCache())

#토큰 사용량 추적
# with get_openai_callback() as cb:
#   result = llm.invoke("대한민국의 수도는 어디인가요?")
#   print(result)

#대화 버퍼 메모리
conversation = ConversationChain(
  llm=llm,
  memory = ConversationBufferWindowMemory(k=8),
  verbose=True,
)

In [52]:
conversation.predict(input="Hi, what's up?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi, what's up?
AI:[0m

[1m> Finished chain.[0m


"Hello! I'm just here, ready to chat with you. How can I assist you today? We could talk about anything from the latest in technology, to your favorite movies, or even some fun facts. What's on your mind?"

In [53]:
conversation.predict(input="What is the capital of Korea?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, what's up?
AI: Hello! I'm just here, ready to chat with you. How can I assist you today? We could talk about anything from the latest in technology, to your favorite movies, or even some fun facts. What's on your mind?
Human: What is the capital of Korea?
AI:[0m

[1m> Finished chain.[0m


'Korea is divided into two countries: North Korea and South Korea. The capital of South Korea is Seoul, a vibrant city known for its modern skyscrapers, high-tech subways, and pop culture. The capital of North Korea is Pyongyang, which is known for its monuments and grand architecture. Do you have any specific interest in either of these cities or countries?'

In [3]:
#I. Streamlit 관련
st.title('RAG 시스템')

# 모델 선택 및 API Key 입력(필요시)
st.subheader('모델 및 파라미터 설정')
selected_model = st.sidebar.selectbox('LLM 모델을 선택하세요', ['Openai-GPT-4o', 'Google-Gemma-2-9b', 'Meta-Llama-3.1-8b'], key='selected_model')
if selected_model == 'Openai-GPT-4o':
    with st.sidebar:
        openai_api_key = st.text_input('Openai API Key를 입력해주세요')

# 사용자 입력
if user_input := st.chat_input("질문을 입력하세요"):
  if not openai_api_key:
    st.info("Openai API key를 입력해주세요")
    st.stop()
#II. Langchain 관련
#Langsmith 설정
load_dotenv()
os.environ["LANGCHAIN_PROJECT"] = "사내 LLM 구축 프로젝트"

2024-08-12 16:40:19.218 
  command:

    streamlit run /root/.cache/pypoetry/virtualenvs/langchain-rag-T8wIn6co-py3.12/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-08-12 16:40:19.219 Session state does not function when running a script without `streamlit run`


In [10]:
# Embedder
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(
    model_name='BAAI/bge-m3',
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True},
)

embeddings_model
embeddings = embeddings_model.embed_documents(
    [
        '안녕하세요!',
        '어! 오랜만이에요',
        '이름이 어떻게 되세요?',
        '날씨가 추워요',
        'Hello LLM!'
    ]
)
len(embeddings), len(embeddings[0])


(5, 1024)

In [None]:
#Vectorstore


In [None]:
# Loader
from langchain_unstructured import UnstructuredLoader

@st.cache_data(show_spinner="파일 로딩중...")
def load_file(file):
  file_contents = file.read
  file_path = os.path.join(FILES_DIR, file.name)