In [1]:
from uuid import uuid4

import chromadb
import nltk
import pacmap
import numpy as np
import plotly.express as px
import torch
from langchain import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders.url import UnstructuredURLLoader
from langchain_community.vectorstores.utils import DistanceStrategy
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline


### 문서 로드 & 전처리

In [None]:
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")

In [None]:
version = "v4.49.0"

urls = [
    f"https://huggingface.co/docs/transformers/{version}/ko/pipeline_tutorial",
    f"https://huggingface.co/docs/transformers/{version}/ko/autoclass_tutorial",
    f"https://huggingface.co/docs/transformers/{version}/ko/preprocessing",
    f"https://huggingface.co/docs/transformers/{version}/ko/training",
    f"https://huggingface.co/docs/transformers/{version}/ko/run_scripts",
    f"https://huggingface.co/docs/transformers/{version}/ko/tokenizer_summary",
    f"https://huggingface.co/docs/transformers/{version}/ko/attention",
    f"https://huggingface.co/docs/transformers/{version}/ko/pad_truncation",
    f"https://huggingface.co/docs/transformers/{version}/ko/pipeline_webserver",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks_explained",
    f"https://huggingface.co/docs/transformers/{version}/ko/hpo_train",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/sequence_classification",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/token_classification",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/question_answering",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/language_modeling",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/masked_language_modeling",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/translation",
    f"https://huggingface.co/docs/transformers/{version}/ko/tasks/summarization",
]
loader = UnstructuredURLLoader(urls=urls, show_progress_bar=True)
docs = loader.load()

In [None]:
docs[0].page_content.split("→")

In [None]:
"""
튜토리얼에 나온 것처럼 ToC를 사용하고 싶었는데 
UnstructuredURLLoader에서 ToC까지 가져오지 못함
"""

for doc in docs:
    doc.page_content = doc.page_content.split("to get started\n\n")[-1].split("< > Update on GitHub\n\n")[0]

In [None]:
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    encode_kwargs={"normalize_embeddings": True}
)

In [None]:
collection_name = "collection_huggingface_transformer"

vector_store = Chroma(
    collection_name=collection_name,
    embedding_function=embedding_model,
    persist_directory="./chroma.db"
)

In [None]:
uuids = [str(uuid4()) for _ in range(len(docs))]
vector_store.add_documents(documents=docs, ids=uuids)

In [None]:
vector_store.get()

In [None]:
vector_store.get("2c92b8f5-6786-4d9a-9b9f-cc094911979b")

In [None]:
vector_store.delete_collection()

In [None]:
user_query = "pipeline이 무엇인지 알려줘."

retriever = vector_store.similarity_search(
    user_query,
    k=2
)

### 프롬프트 & 체인 준비

In [None]:
template = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    당신은 QA(Question-Answering)을 수행하는 Assistant입니다. 다음의 Context를 이용하여 Question에 답변하세요.
    최소 3문장 최대 5문장으로 답변하세요.
    주어진 Context가 없다면 "정보가 부족하여 답변할 수 없습니다."를 출력하세요.
    <|eot_id|>
    <|start_header_id|>user<|end_header_id|>
    Context: {context}
    Question: {question}
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    Answer:
    """
    
prompt = PromptTemplate(input_variables=["context", "question"], template=template)
prompt.pretty_print()

In [None]:
def format_docs(docs):
    print(docs)
    
    return "\n\n".join(doc.page_content for doc in docs)

READER_MODEL_NAME = "yanolja/EEVE-Korean-Instruct-10.8B-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=True
)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_token=500
)

In [None]:
llm = HuggingFacePipeline(pipeline=READER_LLM)
rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser()

In [None]:
question = "Hugging Face Transformers에서 pipeline은 어떤 매개변수를 사용해? 코드도 알려줘."
result = rag_chain.invoke(question)
result