In [1]:
!pip install langchain
!pip install streamlit
!pip install PyPDF2



In [2]:
import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings import OpenAIEmbeddings, SentenceTransformerEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.memory import ConversationBufferWindowMemory
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
# PDF 문서에서 텍스트 추출
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text


# 지정된 크기에 따라 주어진 텍스트를 더 작은 덩어리로 분할
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks


# 추출된 텍스트 조각에 대한 임베딩을 생성하며 벡터DB(FAISS)를 사용하여 벡터 저장소를 만듬
def get_vectorstore(text_chunks):
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_texts(text_chunks, embedding=embeddings)
    return vectorstore

In [4]:
import os
os.environ["OPENAI_API_KEY"] = "sk"   # openai 키 입력

def get_conversation_chain(vectorstore):

    # ConversationBufferWindowMemory() 이전 대화 저장
    memory = ConversationBufferWindowMemory(
        memory_key='chat_history',
        return_messages=True
    )

    # ConversationalRetrievalChain을 통해 챗봇의 질문에 대해 적절히 처리 적용
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k-0613"),
        retriever=vectorstore.as_retriever(),
        get_chat_history=lambda h: h,
        memory=memory
    )

    return conversation_chain

In [5]:
user_uploads = st.file_uploader("파일을 업로드해주세요~", accept_multiple_files=True)

if user_uploads is not None:
    if st.button("Upload"):
        with st.spinner("문서 로딩중..."):
            
            # pdf 텍스트 가져오기
            raw_text = get_pdf_text(user_uploads)

            # 텍스트를 작은 청크로 분할
            text_chunks = get_text_chunks(raw_text)

            # 텍스트 조각에 대한 임베딩 생성 & 벡터 저장소 생성하기
            vectorstore = get_vectorstore(text_chunks)

            # 대화 체인 만들기
            st.session_state.conversation = get_conversation_chain(vectorstore)

2025-11-13 12:55:58.197 
  command:

    streamlit run C:\Users\chogu\anaconda3\envs\11m\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
