In [11]:
import os
import warnings
warnings.filterwarnings("ignore")

In [12]:
from langchain_community.document_loaders import PyMuPDFLoader

# PyMuPDFLoader 을 이용해 PDF 파일 로드
pdf_files = ["alchohol.pdf", "cocktail.pdf"]  # PDF 파일 리스트
pages = []
for pdf_file in pdf_files:
    loader = PyMuPDFLoader(pdf_file)
    pages += loader.load()

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 문서를 문장으로 분리
## 청크 크기 500, 각 청크의 50자씩 겹치도록 청크를 나눈다
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
)
docs = text_splitter.split_documents(pages)

In [14]:
from langchain.embeddings import HuggingFaceEmbeddings

# 문장을 임베딩으로 변환하고 벡터 저장소에 저장
embeddings = HuggingFaceEmbeddings(
    model_name='BAAI/bge-m3',
    #model_kwargs={'device':'cpu'},
    model_kwargs={'device':'cuda'},
    encode_kwargs={'normalize_embeddings':True},
)

In [15]:
# 벡터 저장소 생성
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_documents(docs, embeddings)


# 벡터 저장소 경로 설정
## 현재 경로에 'vectorstore' 경로 생성
vectorstore_path = 'vectorstore'
os.makedirs(vectorstore_path, exist_ok=True)

# 벡터 저장소 생성 및 저장
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory=vectorstore_path)
# 벡터스토어 데이터를 디스크에 저장
vectorstore.persist()
print("Vectorstore created and persisted")

Vectorstore created and persisted


In [16]:
from langchain_community.chat_models import ChatOllama

# Ollama 를 이용해 로컬에서 LLM 실행
## llama3-ko-instruct 모델 다운로드는 Ollama 사용법 참조
model = ChatOllama(model="llama3.2:1b", temperature=0)

In [17]:
retriever = vectorstore.as_retriever(search_kwargs={'k': 3})

In [18]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


# Prompt 템플릿 생성
template = '''As a friendly chatbot, please answer the question as thoroughly and kindly as possible. All answers should be in English:
{context}

Question: {question}
'''

prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return '\n\n'.join([d.page_content for d in docs])

In [19]:
# RAG 없이 단순 프롬프트 실행
simple_prompt = ChatPromptTemplate.from_template('''As a friendly chatbot, please answer the question as thoroughly and kindly as possible. All answers should be in English:

Question: {question}
''')

simple_chain = (
    {'question': RunnablePassthrough()}
    | simple_prompt
    | model
    | StrOutputParser()
)

# RAG 없이 실행
query = "What were the social, economic, and public health impacts of increased alcohol consumption during the COVID-19 pandemic, and what policy approaches can effectively address these issues?"
answer_without_rag = simple_chain.invoke(query)

print("Query (No RAG):", query)
print("Answer (No RAG):", answer_without_rag)

Query (No RAG): What's the impact of COVID-19?
Answer (No RAG): The COVID-19 pandemic has had a significant impact on global health, economies, and societies. Here are some key effects:

1. **Global spread**: The virus was first detected in Wuhan, China in December 2019, but it quickly spread to other parts of the world, infecting millions of people.
2. **Economic disruption**: The pandemic led to widespread lockdowns, travel restrictions, and supply chain disruptions, causing significant economic losses for many countries.
3. **Healthcare strain**: The rapid spread of COVID-19 put a huge burden on healthcare systems worldwide, with many hospitals facing shortages of personal protective equipment (PPE), staff, and resources.
4. **Social distancing measures**: Governments implemented social distancing measures, such as mask mandates, travel restrictions, and stay-at-home orders, to slow the spread of the virus.
5. **Vaccine development**: The pandemic accelerated the development of COVI

In [20]:
# RAG Chain 연결
rag_chain = (
    {'context': retriever | format_docs, 'question': RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

# Chain 실행
answer = rag_chain.invoke(query)

print("Query (RAG):", query)
print("Answer (RAG):", answer)

Query (RAG): What's the impact of COVID-19?
Answer (RAG): The text discusses how COVID-19 can be seen as an exemplar of our ambivalent relationship with alcohol and its consequences. It highlights two groups that need attention: those already struggling with alcohol dependence, who may find online services more appealing than others who lack technology or privacy.

To answer your question directly, the impact of COVID-19 is multifaceted and can be seen in various aspects:

1. **Public health**: The pandemic has led to a significant increase in cases of alcohol-related problems, such as binge drinking and excessive consumption.
2. **Social and economic impacts**: The shift to online services may have exacerbated existing social issues, like isolation and loneliness, which are often linked to alcohol use.
3. **Mental health**: COVID-19 has been linked to increased rates of anxiety, depression, and other mental health concerns, potentially related to the stress and uncertainty caused by t