## 프롬프트 생성

In [35]:
from langchain import PromptTemplate

template = "{product}를 홍보하기 위한 좋은 문구를 추천해줘."
prompt = PromptTemplate(
    template=template,
    input_variables=["product"],
)
prompt.format(product="카메라")

'카메라를 홍보하기 위한 좋은 문구를 추천해줘.'

## LLM 호출

### OpenAI API 사용하기

In [3]:
import os

os.environ["OPENAI_API_KEY"] = "sk-"

In [None]:
from langchain.chat_models import ChatOpenAI

llm1 = ChatOpenAI(temperature=0, # 창의성 0으로 설정
                  model_name="gpt-3.5-turbo-0125")

prompt = "진희는 강아지를 키우고 있습니다. 진희가 키우고 있는 동물은?"
llm1.predict(prompt)

### 허깅페이스 hub 모델 사용하기

In [15]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "hf_"

In [None]:
from langchain import HuggingFaceHub

llm2 = HuggingFaceHub(repo_id="google/flan-t5-xxl",
               model_kwargs={"temperature": 0.8, "max_length": 512})
prompt = "진희는 강아지를 키우고 있습니다. 진희가 키우고 있는 동물은?"
llm2.invoke(prompt)

### Gemini AI chat models

In [2]:
import os

os.environ["GOOGLE_API_KEY"] = ""

In [4]:

from langchain_google_genai import ChatGoogleGenerativeAI

llm3 = ChatGoogleGenerativeAI(model="gemini-pro", 
                              temperature=0)
prompt = "진희는 강아지를 키우고 있습니다. 진희가 키우고 있는 동물은?"
llm3.invoke(prompt)

AIMessage(content='강아지', response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]}, id='run-091066e0-3372-419f-8b87-e6dc0ecb1e01-0')

### 모델 성능 비교

In [None]:
from langchain.model_laboratory import ModelLaboratory

model_lab = ModelLaboratory.from_llms([llm1, llm2, llm3])
model_lab.compare("대한민국의 가을은 몇 월부터 몇 월까지야?")

### 출력 파서

In [4]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain.chat_models import ChatOpenAI

# llm = ChatOpenAI(temperature=0, 
#                  max_tokens=2048, 
#                  model_name="gpt-3.5-turbo-0125")
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0)

output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions() # 출력 형식 지정
prompt = PromptTemplate(
    template="7개의 팀을 보여줘 {subject}.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions}, # <= 출력 형식 전달
)

In [5]:
query = "한국의 야구팀은?"

output = llm.predict(text=prompt.format(subject=query))
parsed_result = output_parser.parse(output)

print(output)
print(parsed_result)

  warn_deprecated(


두산 베어스, 삼성 라이온즈, LG 트윈스, KIA 타이거즈, 롯데 자이언츠, 한화 이글스, 키움 히어로즈
['두산 베어스', '삼성 라이온즈', 'LG 트윈스', 'KIA 타이거즈', '롯데 자이언츠', '한화 이글스', '키움 히어로즈']


## 데이터 연결

### PDF 파일 불러오기

In [7]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("./data/The_Adventures_of_Tom_Sawyer.pdf")
document = loader.load()

# 5 페이지에서 100 글자 읽어오기
document[5].page_content[:100]

'Chapter 1    The Fence \n \nTom Sawyer lived with his aunt because his mother and \nfather were dead. T'

### 임베딩 처리

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
text = "진희는 강아지를 키우고 있습니다. 진희가 키우고 있는 동물은?"
text_embedding = embeddings.embed_query(text)
text_embedding # vector로 바뀌게 된다.

In [58]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
text = "진희는 강아지를 키우고 있습니다. 진희가 키우고 있는 동물은?"
text_embedding = embeddings.embed_query(text)
text_embedding

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[0.013278813101351261, 0.07225915789604187, 0.09263098984956741, -0.003979590255767107, 0.0015617485623806715, -0.10306371748447418, 0.10929881036281586, 0.055662062019109726, -0.03116741217672825, -0.0502031184732914, 0.08312951773405075, -0.008924420922994614, 0.0950632393360138, -0.06980786472558975, 0.03955896198749542, -0.10899192094802856, 0.049438703805208206, 0.037364762276411057, -0.1240922138094902, -0.0033154746051877737, 0.04840953275561333, -0.031085100024938583, 0.008206969127058983, 0.06326048076152802, -0.06804244965314865, -0.010208186693489552, 0.004926975350826979, -0.014940351247787476, -0.0014766291715204716, -0.006598887965083122, -0.040159545838832855, 0.08289805799722672, 0.014144709333777428, -0.011793539859354496, -0.09415140002965927, 0.0021563491318374872, -0.019053086638450623, -0.03773898631334305, -0.003271099179983139, 0.046856120228767395, -0.1811162233352661, -0.11718787252902985, 0.03504839166998863, -0.06848108023405075, 0.06553437560796738, 0.035228

In [8]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
text = "진희는 강아지를 키우고 있습니다. 진희가 키우고 있는 동물은?"
text_embedding = embeddings.embed_query(text)
text_embedding

[0.051796567,
 0.0012831187,
 -0.061716117,
 -0.019468084,
 0.06413684,
 -0.0029500334,
 -0.005008839,
 -0.035008013,
 0.028707424,
 0.040209964,
 -0.044103753,
 0.03324932,
 -0.022462213,
 0.024522226,
 -0.018280877,
 -0.05598827,
 0.02232451,
 0.011281302,
 0.0487188,
 -0.0057292846,
 0.006892528,
 0.012051351,
 -0.01913069,
 0.0087253805,
 0.017330216,
 -0.023209892,
 0.013236641,
 -0.0348444,
 -0.039317638,
 -0.0023582242,
 -0.02595366,
 0.03395715,
 -0.07083947,
 0.0037453463,
 0.0010758026,
 -0.051002707,
 -0.026740054,
 0.013611408,
 0.017121235,
 0.0035125078,
 0.03222491,
 -0.043666262,
 -0.04705308,
 0.018607069,
 -0.03519223,
 0.03237643,
 -0.028474594,
 -0.004485306,
 0.013898104,
 -0.040032804,
 0.025934827,
 0.022557713,
 0.0738044,
 -0.02703792,
 -0.018603696,
 -0.028168276,
 0.041772258,
 -0.0024704982,
 0.022514986,
 0.035138723,
 -0.010367743,
 0.010792714,
 0.029123085,
 0.023394154,
 -0.041342124,
 -0.043447718,
 -0.03078189,
 0.012571238,
 0.04819209,
 0.011711805,

#### 검색기(RetrievalQA) 활용

In [9]:
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
# from langchain.embeddings import OpenAIEmbeddings

# embeddings = OpenAIEmbeddings()
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = FAISS.from_documents(document, embeddings)

In [10]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
# from langchain.chat_models import ChatOpenAI

# llm = ChatOpenAI(temperature=0,
#            model_name="gpt-3.5-turbo-0125")
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0)
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
)
query = "마을 무덤에 있던 남자를 죽인 사람은 누구니?"
result = qa({"query": query})
result

  warn_deprecated(


{'query': '마을 무덤에 있던 남자를 죽인 사람은 누구니?',
 'result': '마을 무덤에 있던 남자를 죽인 사람은 이야기에서 언급되지 않았습니다.'}

## Chain

In [11]:
from langchain.chains import LLMChain
from langchain import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain.chat_models import ChatOpenAI

# llm = ChatOpenAI(temperature=0,
                #  model_name="gpt-3.5-turbo-0125")
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0)
prompt = PromptTemplate(
    input_variables=["country"],
    template="{country}의 수도는 어디야?",
)

chain = LLMChain(llm=llm, prompt=prompt) # prompt와 모델을 체인으로 연결
chain.run("대한민국")

  warn_deprecated(
  warn_deprecated(


'서울'

### SequentialChain으로 여러 체인 연결하기

In [12]:
from langchain.chains import SequentialChain

prompt1 = PromptTemplate(
    input_variables=["sentence"],
    template="다음 문장을 한글로 번역하세요.\n\n{sentence}",
)
chain1 = LLMChain(llm=llm, prompt=prompt1, output_key="translation")

prompt2 = PromptTemplate.from_template(
    "다음 문장을 한 문장으로 요약하세요.\n\n{translation}", # chain1의 output_key
)
chain2 = LLMChain(llm=llm, prompt=prompt2, output_key="summary")

# chain1 + chain2
all_chains = SequentialChain(
    chains=[chain1, chain2],
    input_variables=["sentence"],
    output_variables=["translation", "summary"],
)
sentence="""
One limitation of LLMs is their lack of contextual information (e.g., access to some specific documents or emails). You can combat this by giving LLMs access to the specific external data.
For this, you first need to load the external data with a document loader. LangChain provides a variety of loaders for different types of documents ranging from PDFs and emails to websites and YouTube videos."""
result = all_chains(sentence)
result

{'sentence': '\nOne limitation of LLMs is their lack of contextual information (e.g., access to some specific documents or emails). You can combat this by giving LLMs access to the specific external data.\nFor this, you first need to load the external data with a document loader. LangChain provides a variety of loaders for different types of documents ranging from PDFs and emails to websites and YouTube videos.',
 'translation': 'LLM의 한 가지 한계는 맥락 정보가 부족하다는 것입니다(예: 특정 문서나 이메일 접근). LLM에 특정 외부 데이터에 대한 액세스 권한을 부여하여 이를 해결할 수 있습니다. 이를 위해서는 먼저 문서 로더를 사용하여 외부 데이터를 로드해야 합니다. LangChain은 PDF, 이메일, 웹사이트, YouTube 동영상 등 다양한 유형의 문서에 대한 다양한 로더를 제공합니다.',
 'summary': 'LLM의 맥락 정보 부족 한계는 LangChain의 문서 로더를 사용하여 외부 데이터에 액세스 권한을 부여하여 해결할 수 있습니다.'}

## 메모리

In [19]:
from langchain_google_genai import ChatGoogleGenerativeAI, HarmBlockThreshold, HarmCategory
from langchain import ConversationChain
# from langchain.chat_models import ChatOpenAI

# llm = ChatOpenAI(temperature=0,
#                  model_name="gpt-3.5-turbo-0125")
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0)
conversation = ConversationChain(llm=llm, verbose=True)

# conversation.predict(input="진희는 강아지를 한마리 키우고 있습니다.")
# conversation.predict(input="영수는 고양이를 두마리 키우고 있습니다.")
# conversation.predict(input="진희와 영수가 키우는 동물은 총 몇마리?")

conversation.predict(input="Chris has a dog.")
conversation.predict(input="Emma has 2 cats.")
conversation.predict(input="How many animals do Chris and Emma have?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Chris has a dog.
AI:[0m

[1m> Finished chain.[0m


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Chris has a dog.
AI: Chris has a dog named Buddy. Buddy is a 3-year-old golden retriever. He is very friendly and loves to play fetch. Chris and Buddy often go for walks in the park.
Human: Emma has 2 cats.
AI:[0m

[1m> Finished chain.[0m




'Chris and Emma have 3 animals. Chris has a dog named Buddy, and Emma has 2 cats named Mittens and Whiskers.'

## 에이전트/툴

In [22]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
# from langchain.chat_models import ChatOpenAI

# llm = ChatOpenAI(temperature=0,
#                  model_name="gpt-3.5-turbo-0125")
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0)

tools = load_tools(["wikipedia", "llm-math"], llm=llm)
agent = initialize_agent(tools, llm,
                 agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                 description="계산이 필요할 때 사용",
                 verbose=True)
# agent.run("에드 시런이 태어난 해는? 2024년도 기준으로 에드 시런은 몇 살?")
agent.run("What year was Ed Sheeran born? And how old is he in 2024?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: wikipedia
Action Input: Ed Sheeran[0m
Observation: [36;1m[1;3mPage: Ed Sheeran
Summary: Edward Christopher Sheeran  ( SHEER-ən; born 17 February 1991) is an English singer-songwriter. Born in Halifax, West Yorkshire, and raised in Framlingham, Suffolk, he began writing songs around the age of eleven. In early 2011, Sheeran independently released the extended play No. 5 Collaborations Project. He signed with Asylum Records the same year.
Sheeran's debut album, + ("Plus"), was released in September 2011 and topped the UK Albums Chart. It contained his first hit single "The A Team". In 2012, Sheeran won the Brit Awards for Best British Male Solo Artist and British Breakthrough Act. Sheeran's second studio album, × ("Multiply"), topped charts around the world upon its release in June 2014. It was named the second-best-selling album worldwide of 2015. In the same year, × won Album of the Year at the 2015 Brit Awards, an

'Ed Sheeran was born in 1991 and will be 33 years old in 2024.'