https://www.youtube.com/watch?v=04jCXo5kzZE&list=PLQIgLu3Wf-q_Ne8vv-ZXuJ4mztHJaQb_v&index=12

# 자료 인스톨

In [None]:
import torch

In [None]:

torch.__version__

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git

# 2.양자화

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_quant_type="nf4",
                                bnb_4bit_compute_dtype=torch.bfloat16
                                )

In [None]:
!pip install langchain
!pip install accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes

# 3.LLM 모델 구성

In [None]:
model_id = "kyujinpy/Ko-PlatYi-6B"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")


In [None]:
print(model)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

messages = [ {"role": "user", "content" : "은행의 기준 금리에 대해서 설명해줘"}]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt" )
model_inputs = encodeds.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample= True)
decoded = tokenizer.batch_decode(generated_ids)

decoded[0]

# 4.LangChian 구성

In [None]:
import locale

def getpreferredencoding(do_setlocale =True):
  return "UTF-8"

locale.getpreferredencoding = getpreferredencoding

In [None]:
# !pip -q install langchain pypdf chromadb sentence-transformers faiss-gpu
# !pip install transformers==4.28.0

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from transformers import pipeline

text_generation_pipeline = pipeline(
    model= model,
    tokenizer = tokenizer,
    task = "text-generation", #"summarization",
    temperature =0.2,
    return_full_text=True,
    max_new_tokens = 300,
    )

# Yi prompt (https://replicate.com/01-ai/yi-34b-chat)
#  <|im_start|>system
#  You are a helpful assistant<|im_end|>
#  <|im_start|>user
#  {prompt}<|im_end|>
#  <|im_start|>assistant

prompt_template ="""
### [INST]/
Instruction : Answer the question based on your knowledge.
Here is context to help:
{context}

### QUESTION:
{question}

[/INST]
"""

koplatyi_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template = prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=koplatyi_llm, prompt=prompt)


In [None]:
!pip install unstructured

# 5.DB Loading

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader # PDF document loader
from langchain.schema.runnable import RunnablePassthrough
from langchain_community.document_loaders import UnstructuredExcelLoader # Excel document loader

In [None]:
# loader = PyPDFLoader("/content/outlooks.pdf")
# # loader = PyPDFLoader("/content/economic-outlook2024.pdf")
# pages = loader.load_and_split()

# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
# texts = text_splitter.split_documents(pages)

loader = UnstructuredExcelLoader("/content/news_2024-03-08_99_99.xlsx") #, mode="elements")
loader


In [None]:
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)
texts = text_splitter.split_documents(docs)
for tx in texts[:5]:
  display(tx)
  print()

## 6.임베딩 구성 및 Data

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "jhgan/ko-sbert-nli"
# model_name = "digit82/kobart-summarization"

encode_kwargs ={"normalize_embeddings" : True}
hf = HuggingFaceEmbeddings(
    model_name = model_name,
    encode_kwargs=encode_kwargs,
 )

# from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration, AutoTokenizer, BertTokenizer
# tokenizer = AutoTokenizer.from_pretrained('hyunwoongko/kobart')

In [None]:
%%time
db = FAISS.from_documents(texts,hf)

# CPU times: user 1min 1s, sys: 3.94 s, total: 1min 4s
# Wall time: 1min 6s

# GPU times: user 7.33 s, sys: 7.51 ms, total: 7.33 s
# Wall time: 7.34 s

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

retriever = db.as_retriever(serch_type="similarity",
                            serch_kwargs = {"k":3}
                            )

# prompt = ChatPromptTemplate.from_template(template)
rag_chain = (
             {"context":retriever, "question": RunnablePassthrough()}
             | llm_chain
             )
# 맥락: retriever를 통해 결정
# 질문: RunnablePassthrough 접수
# llm_chain으로 처리를 하라



In [None]:
import warnings
warnings.filterwarnings('ignore')

result = rag_chain.invoke("가상화폐관련 뉴스는 무엇인가?")
print(f"**답변**\n>>>>>{result['text']}\n\n")

for i in result['context']:
  # print(i)
  print(f" ■ 근거: {i.page_content}\n ■ 출처: {i.metadata['source']} \n") # - {i.metadata['page']} \n\n")

In [None]:
i

In [None]:
model_id = "kyujinpy/Ko-PlatYi-6B"
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map=device)
# print(model)

In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

In [None]:
# generated_ids
# decoded

memory = ConversationSummaryBufferMemory(
    llm=model,
    max_token_limit=400,
    memory_key="chat_history",
    return_messages=True,
)

def load_memory(input):
    print(input)
    return memory.load_memory_variables({})["chat_history"]

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI talking to human"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}"),
])

chain = RunnablePassthrough.assign(chat_history=load_memory) | prompt | llm

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    print(result)


In [None]:

invoke_chain("My name is nam.")
invoke_chain("What's my name?")