In [None]:
from huggingface_hub import login
login(token='')

# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token
from getpass import getpass
HUGGINGFACEHUB_API_TOKEN = getpass()

In [None]:
# template = """A chat between a curious user and an artificial intelligence assistant. """\
#            """The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {question}\nAssistant:\n"""

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-v1.0")
model = AutoModelForCausalLM.from_pretrained(
    "Upstage/SOLAR-10.7B-v1.0",
    device_map="auto",
    torch_dtype=torch.float16,
)

In [None]:
text = "한국의 수도는 어디인가요? "
inputs = tokenizer(text, return_tensors="pt")

outputs = model.generate(**inputs, max_new_tokens=64)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

#model_id = 'beomi/Llama-3-Open-Ko-8B-Instruct-preview'
#model_id = 'yanolja/EEVE-Korean-2.8B-v1.0'
model_id = 'yanolja/EEVE-Korean-10.8B-v1.0'

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto",
    device_map="auto",
)
model.eval()

In [None]:
PROMPT = '''당신은 유용한 AI 어시스턴트입니다. 사용자의 질의에 대해 친절하고 정확하게 답변해야 합니다.'''
instruction = "다음 제목의 논문을 요약해줘 'Optimizing Language Augmentation for Multilingual Large Language Models: A Case Study on Korean'"

PROMPT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
instruction = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'


messages = [
    {"role": "system", "content": f"{PROMPT}"},
    {"role": "user", "content": f"{instruction}"}
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=512,
    eos_token_id=terminators,
    do_sample=True,
    temperature=1,
    top_p=0.9,
)

response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))

In [None]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

import time
from pprint import pprint


# download model
model_name_or_path = "MLP-KTLim/llama-3-Korean-Bllossom-8B-gguf-Q4_K_M" # repo id
# 4bit
model_basename = "llama-3-Korean-Bllossom-8B-Q4_K_M.gguf" # file name

# model_name_or_path = "teddylee777/Llama-3-Open-Ko-8B-gguf" # repo id
# model_basename = "Llama-3-Open-Ko-8B-Q8_0.gguf" # file name

model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
#print(model_path)

# # CPU
# lcpp_llm = Llama(
#     model_path=model_path,
#     n_threads=-1,
#     )

# GPU에서 사용하려면 아래 코드로 실행
lcpp_llm = Llama(
    model_path=model_path,
    device=1,
    n_threads=20, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=-1, # Change this value based on your model and your GPU VRAM pool.
    n_ctx=4096, # Context window
)

In [None]:
prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'

prompt = prompt_template.format(prompt=text)

start = time.time()
response = lcpp_llm(
    prompt=prompt,
    max_tokens=256,
    temperature=0.5,
    top_p=0.95,
    top_k=50,
    stop = ['</s>'], # Dynamic stopping when such token is detected.
    echo=True # return the prompt
)
pprint(response)
print(time.time() - start)

In [None]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [None]:
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

In [None]:
question = "Who won the FIFA World Cup in the year 1994? "
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

In [None]:
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

# HuggingFace Model ID
#model_id = 'yanolja/EEVE-Korean-10.8B-v1.0'

# download model
model_name_or_path = "heegyu/EEVE-Korean-Instruct-10.8B-v1.0-GGUF" # repo id
# 4bit
model_id = "yanolja/EEVE-Korean-2.8B-v1.0" # file name

# HuggingFacePipeline 객체 생성
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id, 
    device=-1,               # -1: CPU(default), 0번 부터는 CUDA 디바이스 번호 지정시 GPU 사용하여 추론
    task="text-generation", # 텍스트 생성
    model_kwargs={"temperature": 0.1, 
                  "max_length": 64},
)

# 템플릿
template = """질문: {question}

답변: """

# 프롬프트 템플릿 생성
prompt = PromptTemplate.from_template(template)

# LLM Chain 객체 생성
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [None]:
question = "대한민국의 수도?"
print(llm_chain.run(question=question))

In [None]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

import time
from pprint import pprint

# download model
model_name_or_path = "heegyu/EEVE-Korean-Instruct-10.8B-v1.0-GGUF" # repo id
# 4bit
model_basename = "ggml-model-Q4_K_M.gguf" # file name
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
#print(model_path)

# CPU
# lcpp_llm = Llama(
#     model_path=model_path,
#     n_threads=2,
#     )

# GPU에서 사용하려면 아래 코드로 실행
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=-1, # Change this value based on your model and your GPU VRAM pool.
    n_ctx=4096, # Context window
    verbose=True,
)


prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'

prompt = prompt_template.format(prompt=text)

start = time.time()
response = lcpp_llm(
    prompt=prompt,
    max_tokens=256,
    temperature=0.5,
    top_p=0.95,
    top_k=50,
    stop = ['</s>'], # Dynamic stopping when such token is detected.
    echo=True # return the prompt
)
pprint(response)
print(time.time() - start)


In [None]:
prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'

prompt = prompt_template.format(prompt=text)

start = time.time()
response = lcpp_llm(
    prompt=prompt,
    max_tokens=256,
    temperature=0.5,
    top_p=0.95,
    top_k=50,
    stop = ['</s>'], # Dynamic stopping when such token is detected.
    echo=True # return the prompt
)
pprint(response)
print(time.time() - start)

In [None]:
set CUDA_DOCKER_ARCH=all
set LLAMA_CUBLAS=1
set CMAKE_ARGS=-DLLAMA_CUBLAS=on
set FORCE_CMAKE=1
pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir

In [None]:
# !CMAKE_ARGS="-DLLAMA_CUDA=on" pip install llama-cpp-python
# !huggingface-cli download MLP-KTLim/llama-3-Korean-Bllossom-8B-gguf-Q4_K_M --local-dir='~/.cache/huggingface/hub/'

from llama_cpp import Llama
from transformers import AutoTokenizer

model_id = 'MLP-KTLim/llama-3-Korean-Bllossom-8B-gguf-Q4_K_M'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = Llama(
    model_path='~/.cache/huggingface/hub/llama-3-Korean-Bllossom-8B-Q4_K_M.gguf',
    n_ctx=512,
    n_gpu_layers=-1        # Number of model layers to offload to GPU
)

PROMPT = \
'''당신은 유용한 AI 어시스턴트입니다. 사용자의 질의에 대해 친절하고 정확하게 답변해야 합니다.
You are a helpful AI assistant, you'll need to answer users' queries in a friendly and accurate manner.'''

instruction = 'Your Instruction'

messages = [
    {"role": "system", "content": f"{PROMPT}"},
    {"role": "user", "content": f"{instruction}"}
    ]

prompt = tokenizer.apply_chat_template(
    messages, 
    tokenize = False,
    add_generation_prompt=True
)

generation_kwargs = {
    "max_tokens":512,
    "stop":["<|eot_id|>"],
    "top_p":0.9,
    "temperature":0.6,
    "echo":True, # Echo the prompt in the output
}

resonse_msg = model(prompt, **generation_kwargs)
print(resonse_msg['choices'][0]['text'][len(prompt):])


In [None]:
prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'

prompt = prompt_template.format(prompt=text)

start = time.time()
response = lcpp_llm(
    prompt=prompt,
    max_tokens=256,
    temperature=0.5,
    top_p=0.95,
    top_k=50,
    stop = ['</s>'], # Dynamic stopping when such token is detected.
    echo=True # return the prompt
)
pprint(response)
print(time.time() - start)

In [None]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

template = """A chat between a curious user and an artificial intelligence assistant. """\
           """The assistant gives helpful, detailed, and polite answers to the user's questions.\n Use only Korean.\n"""\
           """Human: {question}\nAssistant:\n"""

prompt = PromptTemplate.from_template(template)
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="~/.huggingface/hub/models--heegyu--EEVE-Korean-Instruct-10.8B-v1.0-GGUF/snapshots/9bf4892cf2017362dbadf99bd9a3523387135362/ggml-model-Q4_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    max_tokens=512,
    temperature=0.5,
    top_p=0.95,
    top_k=50,
    callback_manager=callback_manager,
    verbose=False,  # Verbose is required to pass to the callback manager
)

llm_chain = prompt | llm
question = """사과 한 박스에는 사과가 30개 들어있는데, 처음에는 사과 3박스가 있었고, 내가 사과 5개를 먹었어. 남은 사과는 총 몇개야?"""
llm_chain.invoke({"question": question})

In [None]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/work/.huggingface/hub/models--heegyu--EEVE-Korean-Instruct-10.8B-v1.0-GGUF/snapshots/9bf4892cf2017362dbadf99bd9a3523387135362/ggml-model-Q4_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    max_tokens=1024,
    temperature=1.0,
    top_p=0.95,
    top_k=50,
    stop=['</s>'],
    #callback_manager=callback_manager,
    verbose=False,  # Verbose is required to pass to the callback manager
)

question = """사과 한 박스에는 사과가 30개 들어있는데, 처음에는 사과 3박스가 있었고, 내가 사과 5개를 먹었어. 남은 사과는 총 몇개야?"""

print(f'[답변]: {llm.invoke(question)}')

In [None]:
# !huggingface-cli download MLP-KTLim/llama-3-Korean-Bllossom-8B-gguf-Q4_K_M

from llama_cpp import Llama
from transformers import AutoTokenizer

model_id = 'MLP-KTLim/llama-3-Korean-Bllossom-8B-gguf-Q4_K_M'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = Llama(
    model_path='/work/.huggingface/hub/models--MLP-KTLim--llama-3-Korean-Bllossom-8B-gguf-Q4_K_M/snapshots/a2084328644c0b4438a9ac1935cbb34d7801af7c/llama-3-Korean-Bllossom-8B-Q4_K_M.gguf',
    #n_batch=512,
    n_gpu_layers=-1        # Number of model layers to offload to GPU
)

PROMPT = \
'''당신은 유용한 AI 어시스턴트입니다. 사용자의 질의에 대해 친절하고 정확하게 답변해야 합니다.
You are a helpful AI assistant, you'll need to answer users' queries in a friendly and accurate manner.'''

instruction = '''사과 한 박스에는 사과가 30개 들어있는데, 처음에는 사과 3박스가 있었고, 내가 사과 5개를 먹었어. 남은 사과는 총 몇개야?'''

messages = [
    {"role": "system", "content": f"{PROMPT}"},
    {"role": "user", "content": f"{instruction}"}
    ]

prompt = tokenizer.apply_chat_template(
    messages, 
    tokenize = False,
    add_generation_prompt=True
)

generation_kwargs = {
    "max_tokens":2048,
    "top_k": 50,
    "top_p":0.9,
    "temperature":0.9,
    "stop":["<|eot_id|>"],
    "echo":True, # Echo the prompt in the output
}

resonse_msg = model(prompt, **generation_kwargs)
print(resonse_msg['choices'][0]['text'][len(prompt):])


In [1]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


In [None]:
# 뉴스기사 내용을 로드하고, 청크로 나누고, 인덱싱합니다.
loader = WebBaseLoader(
    web_paths=("https://n.news.naver.com/mnews/article/016/0002315162",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            "div",
            attrs={"class": ["newsct_article _article_body",
                             "media_end_head_title"]},
        )),)
docs = loader.load()
print(f"문서의 수: {len(docs)}")
docs

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

splits = text_splitter.split_documents(docs)

In [None]:
# 벡터스토어를 생성합니다.
# 임베딩 모델 로드
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="jhgan/ko-sroberta-multitask", encode_kwargs={'normalize_embeddings': True})

vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)

# 뉴스에 포함되어 있는 정보를 검색하고 생성합니다.
retriever = vectorstore.as_retriever()

In [5]:
def format_docs(docs):
    # 검색한 문서 결과를 하나의 문단으로 합쳐줍니다.
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

template = """A chat between a curious user and an artificial intelligence assistant. """\
           """The assistant gives helpful, detailed, and polite answers to the user's questions.\n"""\
           """Human: {question}\nAssistant:\n"""

prompt = PromptTemplate.from_template(template)
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/work/.huggingface/hub/models--heegyu--EEVE-Korean-Instruct-10.8B-v1.0-GGUF/snapshots/9bf4892cf2017362dbadf99bd9a3523387135362/ggml-model-Q4_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    max_tokens=1024,
    temperature=0.1,
    top_p=0.95,
    top_k=50,
    stop=['</s>'],
    callback_manager=callback_manager,
    verbose=False,  # Verbose is required to pass to the callback manager
)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("SH공사에서 방화 스카이포레라는 상표에 대해 알려줘.")

In [None]:
rag_chain.invoke("LH에서 창원에 공급하는 브랜드 이름이 무엇인가요?")

In [None]:
# pip install transformers==4.41.1
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "CohereForAI/aya-23-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Format message with the command-r-plus chat template
messages = [{"role": "user", "content": "Anneme onu ne kadar sevdiğimi anlatan bir mektup yaz"}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Anneme onu ne kadar sevdiğimi anlatan bir mektup yaz<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>

gen_tokens = model.generate(
    input_ids, 
    max_new_tokens=100, 
    do_sample=True, 
    temperature=0.3,
    )

gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)


In [1]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
model = LlamaCpp(
    model_path="/work/.huggingface/hub/models--heegyu--EEVE-Korean-Instruct-10.8B-v1.0-GGUF/snapshots/9bf4892cf2017362dbadf99bd9a3523387135362/ggml-model-Q4_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    max_tokens=2048,
    temperature=0.5,
    top_p=0.95,
    top_k=50,
    callback_manager=callback_manager,
    stop=['</s>'],
    echo=True,
    verbose=False,  # Verbose is required to pass to the callback manager
)

In [None]:
# template 정의
template = """
당신은 친절하게 답변해 주는 친절 봇입니다. 사용자의 질문에 [FORMAT]에 맞추어 답변해 주세요.
답변은 항상 한글로 작성해 주세요.

질문:
{question}에 대하여 설명해 주세요.

FORMAT:
- 개요:
- 예시:
- 출처:
"""
prompt_template = PromptTemplate.from_template(template)
prompt_template

In [None]:
prompt = prompt_template.format(country='대한민국')
prompt

In [2]:
# output 
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [None]:
# 주어진 나라에 대하여 수도를 묻는 프롬프트 템플릿을 생성합니다.
# prompt = PromptTemplate.from_template("{country}의 수도는 어디인가요?")

# 주어진 나라에 대하여 수도를 묻는 프롬프트 템플릿을 생성합니다.
template = """
당신은 친절하게 답변해 주는 친절 봇입니다. 사용자의 질문에 [FORMAT]에 맞추어 답변해 주세요.
답변은 항상 한글로 작성해 주세요.

질문:
{question}에 대하여 설명해 주세요.

FORMAT:
- 개요:
- 예시:
- 출처:
"""

# template = """
# 당신은 영어를 가르치는 10년차 영어 선생님입니다. 상황에 [FORMAT]에 영어 회화를 작성해 주세요.

# 상황:
# {question}

# FORMAT:
# - 영어 회화:
# - 한글 해석:
# """


prompt = PromptTemplate.from_template(template)


# 문자열 출력 파서를 초기화합니다.
output_parser = StrOutputParser()

# 프롬프트, 모델, 출력 파서를 연결하여 처리 체인을 구성합니다.
chain = prompt | model | output_parser

# 완성된 Chain 을 이용하여 country 를 '대한민국'으로 설정하여 실행합니다.
# chain.invoke({"country": "대한민국"})
print(chain.invoke({"question": "저는 식당에 가서 음식을 주문하고 싶어요"}))


In [None]:
# prompt 를 PromptTemplate 객체로 생성합니다.
prompt = PromptTemplate.from_template("{topic} 에 대해 쉽게 설명해주세요.")

# input 딕셔너리에 주제를 'ice cream'으로 설정합니다.
input = {"topic": "양자역학"}

# prompt 객체의 invoke 메서드를 사용하여 input을 전달하고 대화형 프롬프트 값을 생성합니다.
prompt.invoke(input)

# prompt 객체와 model 객체를 파이프(|) 연산자로 연결하고 invoke 메서드를 사용하여 input을 전달합니다.
# 이를 통해 AI 모델이 생성한 메시지를 반환합니다.
(prompt | model).invoke(input)

In [None]:
# runnable 프로토콜
# stream: 응답의 청크를 스트리밍함
# invoke: 입력에 대한 체인을 호출함
# batch: 입력 목록에 대해 체인을 호출함

In [2]:
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template('{topic} 에 대하여 3문장으로 설명해줘.')
chain = prompt | model

In [None]:
# 입력 스키마
chain.input_schema.schema()

In [None]:
import json

def print_schema(schema):
    print(json.dumps(schema, indent=4))
    
input_schema = {
    'type': 'object',
    'properties': {'name': {'type': 'string'},
                   'age': {'type': 'integer', 'minimum': 0},
    'required': ['name', 'age'],}
}

print_schema(input_schema)

In [None]:
# unicode_str = "\u3010\u697d\u5929\u5e02\u5834\u3011\u30a2\u30ab\u30a6\u30f3\u30c8\u306e\u652f\u6255\u3044\u65b9\u6cd5\u3092\u78ba\u8a8d\u3067\u304d\u305a\u3001\u6ce8\u6587\u3092\u30ad\u30e3\u30f3\u30bb\u30eb\u3067\u304d\u307e\u305b\u3093."
# actual_char = unicode_str.encode().decode('unicode_escape')
# print(actual_char)
unicode_str = "\u3010\u697d\u5929\u5e02\u5834\u3011\u30a2\u30ab\u30a6\u30f3\u30c8\u306e\u652f\u6255\u3044\u65b9\u6cd5\u3092\u78ba\u8a8d\u3067\u304d\u305a\u3001\u6ce8\u6587\u3092\u30ad\u30e3\u30f3\u30bb\u30eb\u3067\u304d\u307e\u305b\u3093."
actual_char = bytes(unicode_str, 'utf-8').decode('unicode_escape')
actual_char

In [None]:
# 모델의 입력 스키마를 출력합니다.
model.input_schema.schema()


In [None]:
# 체인의 출력 스키마는 마지막 부분의 출력 스키마입니다. 이 경우 ChatModel의 출력 스키마로, ChatMessage를 출력합니다.
chain.output_schema.schema()

In [None]:
# chain.stream 메서드를 사용하여 '멀티모달' 토픽에 대한 스트림을 생성하고 반복합니다.
for s in chain.stream({"topic": "멀티모달"}):
    # 스트림에서 받은 데이터의 내용을 출력합니다. 줄바꿈 없이 이어서 출력하고, 버퍼를 즉시 비웁니다.
    print(s.content, end="", flush=True)

In [None]:
# chain 객체의 invoke 메서드를 호출하고, 'ChatGPT'라는 주제로 딕셔너리를 전달합니다.
chain.invoke({"topic": "ChatGPT"})

In [None]:
# 주어진 토픽 리스트를 batch 처리하는 함수 호출
chain.batch([{"topic": "ChatGPT"}, {"topic": "Instagram"}])

In [None]:
chain.batch(
    [
        {"topic": "ChatGPT"},
        {"topic": "Instagram"},
        {"topic": "멀티모달"},
        {"topic": "프로그래밍"},
        {"topic": "머신러닝"},
    ],
    config={"max_concurrency": 3},
)