In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.runnables import RunnableParallel
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub

from datetime import datetime

from dotenv import load_dotenv
import os
import math
load_dotenv()


True

In [2]:
# 모델 초기화
chat_model = ChatOpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
    model_name="gpt-3.5-turbo-1106", temperature=0.25)

# Load PDF documents using PyPDFLoader with text splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=256)
# PDF 파일 로드. 파일의 경로 입력
loader = PyPDFLoader("docs/ai_trand.pdf")

# 페이지 별 문서 로드
docs = loader.load()
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

In [3]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | chat_model
    | StrOutputParser()
)


In [4]:
def split_long_text(text, line_length=80):
    return [text[i:i+line_length] for i in range(0, len(text), line_length)]

now_dir = os.getcwd()

if os.path.isdir(now_dir + "\Prompts"):
    prompt_file = os.listdir(now_dir + "\Prompts")
    if len(prompt_file) == 0:
        print("Prompt폴더에 프롬프트 txt파일을 생성해주세요")
        
    for file in prompt_file:
        filename_split = os.path.splitext(now_dir+"\Prompts\\" + file)
        if filename_split[1] == ".txt":
            f = open(now_dir+"\Prompts\\" + file, 'r', encoding='UTF8') 
            lines = f.readlines()
            response = rag_chain.invoke(lines[0])
            
            file_split = os.path.basename(file).split(".")
            now = datetime.now()
            
            file_dir = now_dir + "\Result\\"
            file_name = file_split[0] + "_result_" + str(math.floor(now.timestamp())) + filename_split[1]
            w = open(file_dir + file_name, 'w', encoding='UTF8') 
            for line in split_long_text(response, line_length=80):            
                w.write(line + '\n')
            w.close()
else:
    print("Prompt폴더를 생성해서 프롬프트 txt파일을 생성해주세요")