In [75]:
# 2-1 LLM and Chat Models
from langchain_openai import OpenAI, ChatOpenAI # LLM, Chat model

llm = OpenAI()
chat = ChatOpenAI()

a = llm.invoke("How many planets are in the solar system?")
b = chat.invoke("How many planets are in the solar system?")

a, b

('\n\nThere are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune.',
 AIMessage(content='There are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 16, 'total_tokens': 43}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-2e9ccd02-200b-49b5-9220-6469d39b5e18-0', usage_metadata={'input_tokens': 16, 'output_tokens': 27, 'total_tokens': 43}))

In [76]:
## 2-2. Predict Messages
from langchain_openai import ChatOpenAI
# HumanMessage - 인간이 작성하는 Message
# AIMessage - AI에 의해서 보내지는 Message
# SystemMessage - LLM에 설정들을 제공하기 위한 Message
from langchain.schema import HumanMessage, AIMessage, SystemMessage

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

messages = [
    SystemMessage(content="You are a geography expert. And you only reply in Italian."),
    AIMessage(content="Ciao, mi chiamo Paolo!"),
    HumanMessage(content="What is the distance between the Mexico and Thailand. Also, what is your name?"),
]

chat.invoke(messages)

AIMessage(content='Ciao! La distanza tra il Messico e la Thailandia è di circa 16.000 chilometri. Come ti chiami?', response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 53, 'total_tokens': 84}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d65532a0-253a-4c36-8e87-308c9259c095-0', usage_metadata={'input_tokens': 53, 'output_tokens': 31, 'total_tokens': 84})

In [77]:
## 2-3. Prompt Templates
from langchain_openai import ChatOpenAI
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# 일반 문자열 출력 예제
# template = PromptTemplate.from_template("What is the distance between {country_a} and {country_b}")
# prompt = template.format(country_a="Mexico", country_b="Thailand")
# chat.predict(prompt)

# 메시지 출력 예제
messages = [
    SystemMessagePromptTemplate.from_template("You are a geography expert. And you only reply in {language}."),
    AIMessagePromptTemplate.from_template("Ciao, mi chiamo {name}!"),
    HumanMessagePromptTemplate.from_template("What is the distance between the {country_a} and {country_b}. Also, what is your name?")
]

template = ChatPromptTemplate.from_messages(messages)

prompt = template.format_messages(language="Italian", name="Paolo", country_a="Mexico", country_b="Thailand")

chat.invoke(prompt)

AIMessage(content='Ciao! La distanza tra il Messico e la Thailandia è di circa 16.000 chilometri. Come ti chiami?', response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 53, 'total_tokens': 84}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d65532a0-253a-4c36-8e87-308c9259c095-0', usage_metadata={'input_tokens': 53, 'output_tokens': 31, 'total_tokens': 84})

In [78]:
# 2-4. OutputParser and LCEL
from langchain_openai import ChatOpenAI
from langchain.schema import BaseOutputParser
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# 문자열 출력을 파싱하는 BaseOutputParser 확장하는 커스텀 OutputParser
class CommaOutputParser(BaseOutputParser):
    def parse(self, text: str) -> str:
        items = text.strip().split(",")
        return list(map(str.strip, items))
    

# OutputParser 예제 (LCEL 적용 전)
# p = CommaOutputParser()
# messages = [
#     SystemMessagePromptTemplate.from_template("You are a list gernerating machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase. Do Not reply with else."),
#     HumanMessagePromptTemplate.from_template("{question}")
# ]
# template = ChatPromptTemplate.from_messages(messages)
# prompt = template.format_messages(max_items=10, question="What are the colors?")
# res = chat.invoke(prompt)
# p.parse(res.content)

# OutputParser 예제 (LCEL 적용 후)
messages = [
    SystemMessagePromptTemplate.from_template("You are a list gernerating machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase. Do Not reply with else."),
    HumanMessagePromptTemplate.from_template("{question}")
]
template = ChatPromptTemplate.from_messages(messages)
chain = template | chat | CommaOutputParser()
chain.invoke({
    "max_items":10, 
    "question":"What are the colors?",
})

['red',
 'blue',
 'green',
 'yellow',
 'orange',
 'purple',
 'pink',
 'black',
 'white',
 'brown']

In [79]:
# 2-5. Chaining Chains
from langchain_openai import ChatOpenAI
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
    streaming=True, # streaming 옵션을 활성화하여 대화형 모드로 설정
    callbacks=[StreamingStdOutCallbackHandler()], # 콜백 함수를 설정
)

chef_message =  [
    SystemMessagePromptTemplate.from_template("You are a world-class international chef. You create easy to fllow recipies for any type of cuisine with easy to find ingredients."),
    HumanMessagePromptTemplate.from_template("I want to cook {cuisine} food.")
]

chef_prompt = ChatPromptTemplate.from_messages(chef_message)

chef_chain = chef_prompt | chat

veg_chef_message =  [
    SystemMessagePromptTemplate.from_template("You are a vegetarian chef specialized on marking tranditional recipies vegetarian. You find alternatibe ingredients and explain their preparation. You don't redically modify the recipe. If there is no alternative for a food just say you don't know how to replace it."),
    HumanMessagePromptTemplate.from_template("{recipe}")
]

veg_chef_prompt = ChatPromptTemplate.from_messages(veg_chef_message)

veg_chef_chain = veg_chef_prompt | chat

# RunnableMap 사용
final_chain = {"recipe": chef_chain} | veg_chef_chain

final_chain.invoke({
    "cuisine":"indian",
})

AIMessage(content="To make this Chicken Tikka Masala recipe vegetarian, we can replace the chicken with a suitable alternative. Here's how you can modify the recipe:\n\n**Vegetarian Tikka Masala**\n\n**Ingredients:**\n- 1 lb firm tofu, pressed and cut into bite-sized cubes\n- 1 cup plain yogurt (you can use plant-based yogurt for a vegan version)\n- 2 tablespoons lemon juice\n- 2 teaspoons ground cumin\n- 2 teaspoons paprika\n- 1 teaspoon ground turmeric\n- 1 teaspoon garam masala\n- 1 teaspoon ground coriander\n- 1 teaspoon chili powder (adjust to taste)\n- Salt and pepper to taste\n- 2 tablespoons vegetable oil\n- 1 onion, finely chopped\n- 3 cloves garlic, minced\n- 1-inch piece of ginger, grated\n- 1 can (14 oz) crushed tomatoes\n- 1 cup coconut cream (or heavy cream alternative for a vegan version)\n- Fresh cilantro, chopped (for garnish)\n\n**Instructions:**\n1. In a bowl, combine yogurt, lemon juice, cumin, paprika, turmeric, garam masala, coriander, chili powder, salt, and pepp

In [80]:
# 3-1. FewShotPromptTemplate
from langchain_openai import ChatOpenAI
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
    streaming=True, # streaming 옵션을 활성화하여 대화형 모드로 설정
    callbacks=[StreamingStdOutCallbackHandler()], # 콜백 함수를 설정
)

# 모델에게 전달하는 답변 예제
examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    }]



example_prompt = PromptTemplate.from_template("Human: {question}\nAI: {answer}")

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt, # Prompt 방식
    examples=examples, # 답변 예제
    suffix="Human: Wat do you know about {country}?", # 모든 형식화된 예제 마지막 내용
    input_variables=["country"] # suffix 입력 변수 (유효성 검사)
)

chain = prompt | chat

chain.invoke({
    "country":"Germ",
})

AIMessage(content='AI: \n        I\'m sorry, I\'m not sure what you mean by "Germ." Did you mean Germany?', response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-3.5-turbo-0125'}, id='run-a639704b-2eb3-4730-8a41-3c3a437becde-0')

In [81]:
# 3-2. FewShotChatMessagePromptTemplate
from langchain_openai import ChatOpenAI
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import PromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate, FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
    streaming=True, # streaming 옵션을 활성화하여 대화형 모드로 설정
    callbacks=[StreamingStdOutCallbackHandler()], # 콜백 함수를 설정
)

# 모델에게 전달하는 답변 예제
examples = [
    {
        "country": "France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "country": "Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "country": "Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    }]


example_prompt = ChatPromptTemplate.from_messages([
    HumanMessagePromptTemplate.from_template("What do you know about {country}?"),
    AIMessagePromptTemplate.from_template("{answer}"),
]
)

prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt, # Prompt 방식
    examples=examples, # 답변 예제
)

final_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template("You are a geography expert, you give short answers"),
    prompt,
    HumanMessagePromptTemplate.from_template("What do you know about {country}?")
])

chain = final_prompt | chat

chain.invoke({
    "country":"Germany",
})


AIMessage(content='\n        I know this:\n        Capital: Berlin\n        Language: German\n        Food: Bratwurst and Sauerkraut\n        Currency: Euro\n        ', response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-3.5-turbo-0125'}, id='run-213f3cfb-0c41-4b51-aab7-5925964cf6d6-0')

In [82]:
# 3-3. LengthBasedExampleSelector
from langchain_openai import ChatOpenAI
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import PromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate, FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
# from langchain.prompts.example_selector import LengthBasedExampleSelector
from langchain.prompts.example_selector.base import BaseExampleSelector

class RandomExampleSelector(BaseExampleSelector):
    def __init__(self, examples):
        self.examples = examples
    
    def add_example(self, example):
        self.examples.append(example)

    def select_examples(self, input_variables):
        from random import choice
        return [choice(self.examples)]

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
    streaming=True, # streaming 옵션을 활성화하여 대화형 모드로 설정
    callbacks=[StreamingStdOutCallbackHandler()], # 콜백 함수를 설정
)

# 모델에게 전달하는 답변 예제
examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    }]

example_prompt = PromptTemplate.from_template("Human: {question}\nAI: {answer}")

# 예제 선택
example_selector = RandomExampleSelector(
    examples=examples, # 답변 예제
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt, # Prompt 방식
    example_selector=example_selector, # 답변 선택
    suffix="Human: Wat do you know about {country}?", # 모든 형식화된 예제 마지막 내용
    input_variables=["country"] # suffix 입력 변수 (유효성 검사)
)

prompt.format(country="Brazil")

'Human: What do you know about Italy?\nAI: \n        I know this:\n        Capital: Rome\n        Language: Italian\n        Food: Pizza and Pasta\n        Currency: Euro\n        \n\nHuman: Wat do you know about Brazil?'

In [83]:
# 3-4. Serialization and Composition
from langchain_openai import ChatOpenAI
# PromptTemplate - 문자열을 이용한 template 생성
# ChatPromptTemplate - message를 이용하여 template 생성
from langchain.prompts import PromptTemplate, ChatMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate, FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
# Prompt 파일 불러오기
# from langchain.prompts import load_prompt
# Prompt Pipeline 불러오기
from langchain.prompts.pipeline import PipelinePromptTemplate

# Prompt 파일 불러오기
# prompt = load_prompt("./prompt.json")
# prompt = load_prompt("./prompt.yaml")

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
    streaming=True, # streaming 옵션을 활성화하여 대화형 모드로 설정
    callbacks=[StreamingStdOutCallbackHandler()], # 콜백 함수를 설정
)

# Prompt 파일 불러오기
# prompt.format(country="Germany")

intro = PromptTemplate.from_template(
    """
    You are a role playing assistant.
    And you are impersonating a {character}
"""
)

example = PromptTemplate.from_template(
    """
    This is an example of how you talk:

    Human: {example_question}
    You: {example_answer}
"""
)

start = PromptTemplate.from_template(
    """
    Start now!

    Human: {question}
    You:
"""
)

final = PromptTemplate.from_template(
    """
    {intro}
                                     
    {example}
                              
    {start}
"""
)

prompts = [
    ("intro", intro),
    ("example", example),
    ("start", start),
]

full_prompt = PipelinePromptTemplate(
    final_prompt=final,
    pipeline_prompts=prompts,
)

# full_prompt.format(character="Pirate", example_question="What is your location?", example_answer="Arrrrg! That is a secret!! Arg arg!!", question="What is your fav food?")

chain = full_prompt | chat

chain.invoke({
    "character":"Pirate",
    "example_question":"What is your location?",
    "example_answer":"Arrrrg! That is a secret!! Arg arg",
    "question": "What is your fav food?"
})


AIMessage(content='Arrrrg! Me favorite food be a hearty stew made with fresh seafood and plenty of spices! Aye, it be a meal fit for a pirate like meself! Arrrrg!', response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-3.5-turbo-0125'}, id='run-24515c0d-3d20-4b5c-8ffb-4b6a73447353-0')

In [84]:
# 3-5. Caching
from langchain_openai import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache, SQLiteCache

# 메모리 캐시
# set_llm_cache(InMemoryCache())
# set_debug(False)

# 데이터베이스 캐시
set_llm_cache(SQLiteCache('cache.db'))

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
    # streaming=True, # streaming 옵션을 활성화하여 대화형 모드로 설정
    # callbacks=[StreamingStdOutCallbackHandler()], # 콜백 함수를 설정
)

chat.predict("How do you make italian pasta?")

'To make Italian pasta, you will need the following ingredients:\n\n- 2 cups of all-purpose flour\n- 2 large eggs\n- Pinch of salt\n\nHere is a step-by-step guide to making Italian pasta:\n\n1. On a clean work surface, pour the flour and create a well in the center.\n2. Crack the eggs into the well and add a pinch of salt.\n3. Using a fork, gradually mix the eggs into the flour until a dough forms.\n4. Knead the dough for about 10 minutes until it is smooth and elastic.\n5. Wrap the dough in plastic wrap and let it rest for at least 30 minutes.\n6. After resting, roll out the dough using a pasta machine or a rolling pin until it is thin.\n7. Cut the dough into your desired shape, such as fettuccine or spaghetti.\n8. Cook the pasta in a large pot of boiling salted water for 2-3 minutes or until al dente.\n9. Drain the pasta and toss with your favorite sauce or toppings.\n10. Serve hot and enjoy your homemade Italian pasta!'

In [88]:
# 3-6. Serialization
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI
from langchain.callbacks import get_openai_callback
from langchain.llms.loading import load_llm

# chat = ChatOpenAI(
#     temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
# )

# 지출 비용 확인
# with get_openai_callback() as usage:
#     a = chat.predict("What is the recipe for soju?")
#     b = chat.predict("What is the recipe for bread?")
#     print(a, b, "\n")
#     print(usage)

# Serialization
# 모델 저장
# chat = OpenAI(
#     temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
#     max_tokens=450,
#     model="gpt-3.5-turbo-16k",
# )

# chat.save('model.json')

# 모델 불러오기
chat = load_llm('model.json')

print(chat)

[1mOpenAIChat[0m
Params: {'model_name': 'gpt-3.5-turbo-16k', 'temperature': 0.1, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'logit_bias': {}, 'max_tokens': 450}




In [90]:
# 4-1. ConversationBufferMemory
from langchain.memory import ConversationBufferMemory

# 쳇 모델 용으로 사용할 경우 return_messages=True로 설정
memory = ConversationBufferMemory(return_messages=True)

memory.save_context({"input": "Hi!"}, {"output": "How are you?"})

memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi!'), AIMessage(content='How are you?')]}

In [92]:
# 4-2. ConversationBufferWindowMemory
from langchain.memory import ConversationBufferWindowMemory

# 쳇 모델 용으로 사용할 경우 return_messages=True로 설정, K는 메모리의 크기
memory = ConversationBufferWindowMemory(return_messages=True, k=4)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

add_message(1, 1)
add_message(2, 2)
add_message(3, 3)
add_message(4, 4)
add_message(5, 5)

memory.load_memory_variables({})

{'history': [HumanMessage(content='2'),
  AIMessage(content='2'),
  HumanMessage(content='3'),
  AIMessage(content='3'),
  HumanMessage(content='4'),
  AIMessage(content='4'),
  HumanMessage(content='5'),
  AIMessage(content='5')]}

In [94]:
# 4-3. ConversationSummaryMemory
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryMemory

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

memory = ConversationSummaryMemory(llm=chat)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

def get_history():
    return memory.load_memory_variables({})

add_message("HI I am a human, I live in South Korea", "Wow that is cool! I am a robot living in the cloud.")
add_message("South Korea is so pretty", "I wish I could visit there.")

get_history()


{'history': 'The human introduces themselves as living in South Korea, and the AI responds by mentioning it is a robot living in the cloud. The human comments on the beauty of South Korea, and the AI expresses a wish to visit there.'}

In [1]:
# 4-4. ConversationSummaryBufferMemory
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# max_token_limit은 메모리에 저장할 최대 토큰 수
memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=10,
    return_messages=True
)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

def get_history():
    return memory.load_memory_variables({})

add_message("HI I am a human, I live in South Korea", "Wow that is cool! I am a robot living in the cloud.")
add_message("South Korea is so pretty", "I wish I could visit there.")

get_history()

{'history': [SystemMessage(content='The human introduces themselves as living in South Korea. The AI responds by mentioning it is a robot living in the cloud and expresses a desire to visit South Korea because it is so pretty.')]}

In [4]:
# 4-5. ConversationKGMemory
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationKGMemory

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# max_token_limit은 메모리에 저장할 최대 토큰 수
memory = ConversationKGMemory(
    llm=chat,
    return_messages=True,
)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

add_message("HI I am a Lee, I live in South Korea", "Wow that is cool! I am a robot living in the cloud.")
add_message("Lee likes kimchi", "wow that is cool!")

memory.load_memory_variables({"input": "who is Lee"})
memory.load_memory_variables({"input": "what does Lee like"})

{'history': [SystemMessage(content='On Lee: Lee lives in South Korea. Lee likes kimchi.')]}

In [12]:
# 4-6. Memory on LLMChain
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# max_token_limit은 메모리에 저장할 최대 토큰 수
memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=120,
    memory_key="chat_history",
)



template = """
    You are a helpful AI talking to a human.

    {chat_history}
    Human:{question}
    You:
"""

prompt = PromptTemplate.from_template(template)

chain = LLMChain(
    llm=chat,
    memory=memory,
    prompt=prompt,
    verbose=True, 
)

chain.predict(question="My name is Seo")
chain.predict(question="I live Seoul in South Korea") 
chain.predict(question="What is my name?")

memory.load_memory_variables({})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    
    Human:My name is Seo
    You:
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    Human: My name is Seo
AI: Hello Seo! How can I assist you today?
    Human:I live Seoul in South Korea
    You:
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    Human: My name is Seo
AI: Hello Seo! How can I assist you today?
Human: I live Seoul in South Korea
AI: That's great to know! How can I assist you with information or tasks related to Seoul or South Korea?
    Human:What is my name?
    You:
[0m

[1m> Finished chain.[0m


{'chat_history': "Human: My name is Seo\nAI: Hello Seo! How can I assist you today?\nHuman: I live Seoul in South Korea\nAI: That's great to know! How can I assist you with information or tasks related to Seoul or South Korea?\nHuman: What is my name?\nAI: Your name is Seo."}

In [16]:
# 4-7. Chat Based Memory
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# max_token_limit은 메모리에 저장할 최대 토큰 수
memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)



prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template("You are a helpful AI talking to a human."),
    MessagesPlaceholder(variable_name="chat_history"), # 많은 메시지들 사이를 구분하기 위한 placeholder
    HumanMessagePromptTemplate.from_template("{question}"),
])

chain = LLMChain(
    llm=chat,
    memory=memory,
    prompt=prompt,
    verbose=True, 
)

chain.predict(question="My name is Seo")
chain.predict(question="I live Seoul in South Korea") 
chain.predict(question="What is my name?")

memory.load_memory_variables({})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human.
Human: My name is Seo[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human.
Human: My name is Seo
AI: Nice to meet you, Seo! How can I assist you today?
Human: I live Seoul in South Korea[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human.
Human: My name is Seo
AI: Nice to meet you, Seo! How can I assist you today?
Human: I live Seoul in South Korea
AI: That's great to know, Seo! Is there anything specific you would like to know or talk about regarding Seoul or South Korea?
Human: What is my name?[0m

[1m> Finished chain.[0m


'Your name is Seo.'

In [39]:
# 4-8. LCEL Based Memory
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough

chat = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# max_token_limit은 메모리에 저장할 최대 토큰 수
memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=120,
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template("You are a helpful AI talking to a human."),
    MessagesPlaceholder(variable_name="history"), # 많은 메시지들 사이를 구분하기 위한 placeholder
    HumanMessagePromptTemplate.from_template("{question}"),
])

def load_memory(_):
    return memory.load_memory_variables({})["history"]

chain = RunnablePassthrough.assign(history=load_memory) | prompt | chat

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context({"input": question}, {"output": result.content})
    return result

invoke_chain("My name is Seo")
invoke_chain("I live Seoul in South Korea")
invoke_chain("What is my name?")

AIMessage(content='Your name is Seo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 96, 'total_tokens': 102}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ed30c92a-055d-47bc-b4aa-e56cbd8d5508-0', usage_metadata={'input_tokens': 96, 'output_tokens': 6, 'total_tokens': 102})

In [62]:
# 5-1. Data Loaders and Splitters
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter

# chunk_size - 텍스트를 분할하는 크기
# chunk_overlap - 분할된 텍스트의 중복 크기
# separator - 텍스트를 분할하는 구분자
splitter = CharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=100,
    separator="\n"
)

loader = UnstructuredFileLoader("./files/chapter_one.pdf")

len(loader.load_and_split(text_splitter=splitter))

Created a chunk of size 699, which is longer than the specified 600
Created a chunk of size 793, which is longer than the specified 600
Created a chunk of size 845, which is longer than the specified 600
Created a chunk of size 1236, which is longer than the specified 600
Created a chunk of size 775, which is longer than the specified 600
Created a chunk of size 2532, which is longer than the specified 600
Created a chunk of size 671, which is longer than the specified 600
Created a chunk of size 718, which is longer than the specified 600
Created a chunk of size 1388, which is longer than the specified 600
Created a chunk of size 892, which is longer than the specified 600
Created a chunk of size 1861, which is longer than the specified 600
Created a chunk of size 635, which is longer than the specified 600
Created a chunk of size 1110, which is longer than the specified 600
Created a chunk of size 1701, which is longer than the specified 600
Created a chunk of size 760, which is long

50

In [65]:
# 5-2. Tiktoken
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter

# chunk_size - 텍스트를 분할하는 크기
# chunk_overlap - 분할된 텍스트의 중복 크기
# separator - 텍스트를 분할하는 구분자
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=100,
    separator="\n",
)

loader = UnstructuredFileLoader("./files/chapter_one.pdf")

loader.load_and_split(text_splitter=splitter)

[Document(page_content='1–1Introduction This two-year course in physics is presented from the point of view that you, the reader, are going to be a physicist. This is not necessarily the case of course, but that is what every professor in every subject assumes! If you are going to be a physicist, you will have a lot to study: two hundred years of the most rapidly developing field of knowledge that there is. So much knowledge, in fact, that you might think that you cannot learn all of it in four years, and truly you cannot; you will have to go to graduate school too!\nSurprisingly enough, in spite of the tremendous amount of work that has been done for all this time it is possible to condense the enormous mass of results to a large extent—that is, to find laws which summarize all our knowledge. Even so, the laws are so hard to grasp that it is unfair to you to start exploring this tremendous subject without some kind of map or outline of the relationship of one part of the subject of sc

In [None]:
# 5-3. Vectors
# Not Example

In [89]:
# 5-4. Vectors Store
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

# chunk_size - 텍스트를 분할하는 크기
# chunk_overlap - 분할된 텍스트의 중복 크기
# separator - 텍스트를 분할하는 구분자
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=100,
    separator="\n",
)

loader = UnstructuredFileLoader("./files/chapter_one.pdf")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# cache_dir - 캐시 디렉토리
cache_dir = LocalFileStore("./.cache/")

# 캐시된 임베딩을 사용하여 Vector Store 초기화
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

# Vector Store 초기화
vectorstore = Chroma.from_documents(docs, cached_embeddings)

# 유사도 검색
result = vectorstore.similarity_search("what is introduction")

result

In [88]:
# 5-5. Langsmith
# Not Example

'1–1Introduction This two-year course in physics is presented from the point of view that you, the reader, are going to be a physicist. This is not necessarily the case of course, but that is what every professor in every subject assumes! If you are going to be a physicist, you will have a lot to study: two hundred years of the most rapidly developing field of knowledge that there is. So much knowledge, in fact, that you might think that you cannot learn all of it in four years, and truly you cannot; you will have to go to graduate school too!\nSurprisingly enough, in spite of the tremendous amount of work that has been done for all this time it is possible to condense the enormous mass of results to a large extent—that is, to find laws which summarize all our knowledge. Even so, the laws are so hard to grasp that it is unfair to you to start exploring this tremendous subject without some kind of map or outline of the relationship of one part of the subject of science to another. Follo

In [4]:
# 5-6. RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# chunk_size - 텍스트를 분할하는 크기
# chunk_overlap - 분할된 텍스트의 중복 크기
# separator - 텍스트를 분할하는 구분자
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=100,
    separator="\n",
)

loader = UnstructuredFileLoader("./files/chapter_one.pdf")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# cache_dir - 캐시 디렉토리
cache_dir = LocalFileStore("./.cache/")

# 캐시된 임베딩을 사용하여 Vector Store 초기화
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

# Vector Store 초기화
vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_reduce", # map_reduce, stuff
    retriever=vectorstore.as_retriever(),  
)

chain.run("What is Physics?")

'Physics is the study of the fundamental principles that govern the natural world. It involves exploring the relationships between matter and energy, space and time, and the forces that act upon them. Physics is presented as a rapidly developing field of knowledge that has been studied for over two hundred years, condensing a large amount of results into laws that summarize our knowledge. The correct statement of the laws of physics involves unfamiliar ideas that require advanced mathematics for their description. Physics is considered to be an approximation to the complete truth, as our knowledge is always evolving and subject to correction.'

In [15]:
# 5-7. Stuff LCEL Chain
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# chunk_size - 텍스트를 분할하는 크기
# chunk_overlap - 분할된 텍스트의 중복 크기
# separator - 텍스트를 분할하는 구분자
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=100,
    separator="\n",
)

loader = UnstructuredFileLoader("./files/chapter_one.pdf")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# cache_dir - 캐시 디렉토리
cache_dir = LocalFileStore("./.cache/")

# 캐시된 임베딩을 사용하여 Vector Store 초기화
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

# Vector Store 초기화
vectorstore = Chroma.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever();

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpfull assistant. Answer questions using only the following context. If you don't know the answer just say you don't knowm, don't make it up:\n{context}"),
    ("human", "{question}"),
])

chain = {"context": retriver, "question": RunnablePassthrough()} | prompt | llm

chain.invoke("What is Physics?")

AIMessage(content='Physics is a rapidly developing field of knowledge that studies the fundamental principles of the universe, including matter, energy, motion, and force.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 2148, 'total_tokens': 2175}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a5fc7e62-2709-4a2c-82ed-bec24e253b5a-0', usage_metadata={'input_tokens': 2148, 'output_tokens': 27, 'total_tokens': 2175})

In [None]:
# 5-8. Map Reduce LCEL Chain
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(
    temperature=0.1, # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

# chunk_size - 텍스트를 분할하는 크기
# chunk_overlap - 분할된 텍스트의 중복 크기
# separator - 텍스트를 분할하는 구분자
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=100,
    separator="\n",
)

loader = UnstructuredFileLoader("./files/chapter_one.pdf")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# cache_dir - 캐시 디렉토리
cache_dir = LocalFileStore("./.cache/")

# 캐시된 임베딩을 사용하여 Vector Store 초기화
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

# Vector Store 초기화
vectorstore = Chroma.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever();

map_doc_prompt = ChatPromptTemplate.from_messages([
    ("system", 
    """
    Use the following portion of a long document to see if any of the
    text is relevant to answer the question. Return any relevant text
    verbatim.
    """
    ),
    ("human", "{question}")
])

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"question": question, "context": doc.page_content}
        ).content 
        for doc in documents
    )
    
map_chain = {"documents": retriver, "question": RunnablePassthrough()} | RunnableLambda(map_docs) 

final_prompt = ChatPromptTemplate.from_messages([
    ('system', 
    """
    Given the following extracted parts of a long document and a 
    question, create a final answer.
    If you don't know the answer, just say you don't know. Don't try
    to make up an anwser.
    {context}
    """
     ),
     ("human", "{question}")
])

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("What is Physics?")

In [10]:
# 7-1. HuggingFaceHub
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate

prompt = PromptTemplate.from_template("What is the meaning of {word}")

llm = HuggingFaceHub(
    # repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct", 
    model_kwargs={
        "max_new_tokens": 250,
    }
)

chain = prompt | llm

chain.invoke({"word": "potato"})

'What is the meaning of potato?\nThe word "potato" comes from the Spanish word "patata," which was borrowed from the Taino language of the Caribbean. The Taino people called the potato "batata," which referred to the sweet potato. The Spanish conquistador Hernán Cortés brought the potato back to Europe in the 16th century, where it became a staple crop.\nThe meaning of potato is a type of root vegetable that is native to the Andean region of South America. It is a starchy vegetable that is high in carbohydrates and low in fat. Potatoes are a good source of fiber, potassium, and several important vitamins and minerals. They are also a versatile ingredient that can be prepared in a variety of ways, including baking, boiling, mashing, and frying.\nIn addition to its culinary uses, the potato has also played an important role in human history and culture. It was a staple crop for many indigenous cultures in the Americas, and it played a key role in the development of European cuisine. The 

In [2]:
# 7-2. HuggingFacePipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

prompt = PromptTemplate.from_template("A {word} is a")

llm = HuggingFacePipeline.from_model_id(
    model_id="openai-community/gpt2",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 50,
    },
)

chain = prompt | llm

chain.invoke({"word": "potato"})

'A potato is a type of white potato with three-quarter legs.\n\nA yellowish, oval in shape, is actually a potato that comes from an animal.\n\nA potato has a large, yellowish outer shell and also has a thick outer shell.'

In [None]:
# 7-3. GPT4ALL
from langchain_community.llms import GPT4All
from langchain.prompts import PromptTemplate

prompt = PromptTemplate.from_template("A {word} is a")

llm = GPT4All(model="./gpt4all-falcon-q4_0.gguf", n_threads=1)

chain = prompt | llm

chain.invoke({"word": "tomato"})

In [9]:
# 8-8. Function Calling
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
import json

# 날씨 함수 정의
def get_weather(lon, lat):
    print(f"Call an API...{lon}, {lat}")

# 날씨 함수 스키마
function = {
    "name": "get_weather",
    "description": "function that takes longitude and latitude to find the weather of a place",
    "parameters": {
        "type": "object",
        "properties": {
            "lon": {"type": "string", "description": "The longitue coordinate"},
            "lat": {"type": "string", "description": "The latitude coordinate"},
        },
    },
    "required": ["lon", "lat"]
}

llm = ChatOpenAI(
    temperature=0.1  # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
).bind(
    function_call="auto",  # 자동으로 함수 호출
    functions=[function],  # 함수 정의
)

# 템플릿 생성
prompt = PromptTemplate.from_template("Who is the weather in {city}")

# 체인 생성
chain = prompt | llm

# 체인 실행
response = chain.invoke({"city": "Seoul"})

# 함수 호출
response = response.additional_kwargs["function_call"]["arguments"]

# JSON 파싱
r = json.loads(response)

# 함수 호출
get_weather(**r)

Call an API...126.9779, 37.5665


In [1]:
# 10-1. Audio Extraction
import subprocess

def extract_audio_from_video(video_path, audio_path):
    command = ["ffmpeg", "-i", video_path, "-vn", audio_path]
    subprocess.run(command)

extract_audio_from_video("./files/podcast.mp4", "./files/audio.mp3")

ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.3.9.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --

In [4]:
# 10-2. Cutting The Audio
import math
from pydub import AudioSegment

# 오디오를 10분 단위로 자르기
def cutting_audio_into_chunks(
    audio_path,
    chunks_path,
    chunk_size=10,
):
    track = AudioSegment.from_file(audio_path)
    ten_minutes = chunk_size * 60 * 1000
    chunks = math.ceil(len(track) / ten_minutes)
    for i in range(chunks):
        start_time = i * ten_minutes
        end_time = (i + 1) * ten_minutes
        chunk = track[start_time:end_time]
        chunk.export(f"{chunks_path}/chunk_{i}.mp3", format="mp3")

cutting_audio_into_chunks("./files/audio.mp3", "./files/chunks")

In [16]:
# 10-3. Whisper Transcript
import openai
import glob

# Get Audio to Text
def transcribe_chunks(chunk_path, destination):
    files = glob.glob(f"{chunk_path}/*.mp3")
    for file in files: 
        with open(file, "rb") as audio_file, open(destination, "a") as text_file:
            # transcribe audio
            transcript = openai.audio.transcriptions.create(
                model="whisper-1", file=open(audio_file.name, "rb"), language="en"
            )
            text_file.write(transcript.text)

transcribe_chunks("./files/chunks", "./files/transcript.txt")

In [7]:
# 11-1. Your First Agent
from langchain_openai import ChatOpenAI
from langchain.tools import StructuredTool
from langchain.agents import initialize_agent, AgentType

llm = ChatOpenAI(
    temperature=0.1 # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)
)

def plus(a, b):
    return float(a) + float(b)

agent = initialize_agent(
    llm=llm,  # LLM 모델
    verbose=True,  # 상세 모드
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,  # 에이전트 타입
    handle_parsing_errors=True,  # 파싱 에러 처리
    tools=[
        StructuredTool.from_function(
            func=plus,
            name="Sum Calculator",
            description="Use this to perform sums of two numbers. This tool take two arguments, both should be numbers.",
        )  # 함수 정의
    ],
)

prompot = "Cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12"

agent.invoke(prompot)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I can use the Sum Calculator tool to find the total cost of all these amounts added together.
Action:
```
{
  "action": "Sum Calculator",
  "action_input": {"a": 355.39, "b": 924.87}
}
```[0m
Observation: [36;1m[1;3m1280.26[0m
Thought:[32;1m[1;3mI have calculated the sum of the first two amounts. Now, I will continue adding the remaining amounts to find the total cost.
Action:
```
{
  "action": "Sum Calculator",
  "action_input": {"a": 1280.26, "b": 721.2}
}
```[0m
Observation: [36;1m[1;3m2001.46[0m
Thought:[32;1m[1;3mI have calculated the sum of the first three amounts. Now, I will continue adding the remaining amounts to find the total cost.
Action:
```
{
  "action": "Sum Calculator",
  "action_input": {"a": 2001.46, "b": 1940.29}
}
```[0m
Observation: [36;1m[1;3m3941.75[0m
Thought:[32;1m[1;3mI have calculated the sum of the first four amounts. Now, I will continue adding the remaining amounts to 

{'input': 'Cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12',
 'output': 'I have calculated the sum of the first four amounts. Now, I will continue adding the remaining amounts to find the total cost.'}

In [6]:
# 11-3. Zero-shot ReAct Agent
from langchain_openai import ChatOpenAI
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType

llm = ChatOpenAI(temperature=0.1)  # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)


def plus(input):
    a, b = input.split(",")
    return float(a) + float(b)


agent = initialize_agent(
    llm=llm,  # LLM 모델
    verbose=True,  # 상세 모드
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # 에이전트 타입
    handle_parsing_errors=True,  # 파싱 에러 처리
    tools=[
        Tool.from_function(
            func=plus,
            name="Sum Calculator",
            description="Use this to perform sums of two numbers. Use this tool by sending a pair of number separated by a comma.\nExample: 1,2",
        )  # 함수 정의
    ],
)

prompt = "Cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12"

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to add up all these numbers to find the total cost.
Action: Sum Calculator
Action Input: 355.39, 924.87[0m
Observation: [36;1m[1;3m1280.26[0m
Thought:[32;1m[1;3mI need to continue adding the remaining numbers to the total.
Action: Sum Calculator
Action Input: 1280.26, 721.2[0m
Observation: [36;1m[1;3m2001.46[0m
Thought:[32;1m[1;3mI need to continue adding the remaining numbers to the total.
Action: Sum Calculator
Action Input: 2001.46, 1940.29[0m
Observation: [36;1m[1;3m3941.75[0m
Thought:[32;1m[1;3mI need to continue adding the remaining numbers to the total.
Action: Sum Calculator
Action Input: 3941.75, 573.63[0m
Observation: [36;1m[1;3m4515.38[0m
Thought:[32;1m[1;3mI need to continue adding the remaining numbers to the total.
Action: Sum Calculator
Action Input: 4515.38, 65.72[0m
Observation: [36;1m[1;3m4581.1[0m
Thought:[32;1m[1;3mI need to continue adding the remaining numbers to the 

{'input': 'Cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12',
 'output': '$5273.38'}

In [9]:
# 11-3. OpenAI Functions Agent
from langchain_openai import ChatOpenAI
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
from pydantic import BaseModel, Field
from typing import Type

# LLM 모델 초기화
llm = ChatOpenAI(temperature=0.1)  # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)

# 계산기 툴 스키마
class CalculatorToolArgsSchema(BaseModel):
    a: float = Field(description="The first number")
    b: float = Field(description="The second number")

# 계산기 툴
class CalculatorTool(BaseTool):
    name = "CalculatorTool"
    description = """
    Use this to perform sums of two numbers.
    The first and second arguments should be numbers.
    Only receives two arguments.
    """
    args_schema: Type[CalculatorToolArgsSchema] = CalculatorToolArgsSchema

    def _run(self, a, b):
        return a + b

# 에이전트 초기화
agent = initialize_agent(
    llm=llm,  # LLM 모델
    verbose=True,  # 상세 모드
    agent=AgentType.OPENAI_FUNCTIONS,  # 에이전트 타입
    handle_parsing_errors=True,  # 파싱 에러 처리
    tools=[CalculatorTool()],
)

# 프롬프트
prompt = "Cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12"

# 에이전트 실행
agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 355.39, 'b': 924.87}`


[0m[36;1m[1;3m1280.26[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 1280.26, 'b': 721.2}`


[0m[36;1m[1;3m2001.46[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 2001.46, 'b': 1940.29}`


[0m[36;1m[1;3m3941.75[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 3941.75, 'b': 573.63}`


[0m[36;1m[1;3m4515.38[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 4515.38, 'b': 65.72}`


[0m[36;1m[1;3m4581.1[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 4581.1, 'b': 35.0}`


[0m[36;1m[1;3m4616.1[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 4616.1, 'b': 552.0}`


[0m[36;1m[1;3m5168.1[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 5168.1, 'b': 76.16}`


[0m[36;1m[1;3m5244.26[0m[32;1m[1;3m
Invoking: `CalculatorTool` with `{'a': 5244.26, 'b': 29.12}`


[0m[36;1m[1;3m5273.38[0m[32;1m[1;

{'input': 'Cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12',
 'output': 'The total cost of $355.39 + $924.87 + $721.2 + $1940.29 + $573.63 + $65.72 + $35.00 + $552.00 + $76.16 + $29.12 is $5273.38.'}

In [3]:
# 11-5. Search Tool
from langchain_openai import ChatOpenAI
from langchain.tools import BaseTool
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import initialize_agent, AgentType
from pydantic import BaseModel, Field
from typing import Type


# LLM 모델 초기화
llm = ChatOpenAI(
    temperature=0.1
    )  # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)

class StockMarketSymbolSearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for")


class StockMarketSymbolSearchTool(BaseTool):
    name = "StockMarketSymbolSearchTool"
    description = "Use this tool to find the stock market symbol of a company."
    args_schema: Type[StockMarketSymbolSearchToolArgsSchema] = (
        StockMarketSymbolSearchToolArgsSchema
    )

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query=query)


# 에이전트 초기화
agent = initialize_agent(
    llm=llm,  # LLM 모델
    verbose=True,  # 상세 모드
    agent=AgentType.OPENAI_FUNCTIONS,  # 에이전트 타입
    handle_parsing_errors=True,  # 파싱 에러 처리
    tools=[StockMarketSymbolSearchTool()],
)

prompt = "Give me information on Cloudflare stock and help me analyze if it's a potential good investment. Also tell me what symbol does the stock have."

agent.invoke(prompt)

  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `StockMarketSymbolSearchTool` with `{'query': 'Cloudflare'}`


[0m[36;1m[1;3mRead the latest posts from Cloudflare experts and leaders on topics such as Internet traffic, cybersecurity, elections, AI, and more. Learn how Cloudflare products and features can help you protect, optimize, and innovate your online presence. Missed the live action at Cloudflare Connect? No worries! Dive into the exhilarating moments, groundbreaking insights, and industry revelations with our exclusive highlight reel. Immerse yourself in the energy of the event and stay tuned, stay connected, and stay inspired. Finally, because Cloudflare also operates a CDN, websites that are already on Cloudflare will be given a "hot-path," and will load faster. We at Cloudflare are always striving to bring more privacy options to the open Internet, and we are excited to provide more private and secure browsing to Edge users. San Francisco, CA, May 2

{'input': "Give me information on Cloudflare stock and help me analyze if it's a potential good investment. Also tell me what symbol does the stock have.",
 'output': "The stock market symbol for Cloudflare is NET. Cloudflare, Inc. (NYSE: NET) is a leading connectivity cloud company that has shown strong growth in revenue. In the first quarter of 2024, Cloudflare grew revenue by 30% year-over-year to $378.6 million, fueled by a record number of net-new customers.\n\nTo analyze if Cloudflare is a potential good investment, we can consider the following factors:\n1. **Financial Performance**: Cloudflare's strong revenue growth indicates a healthy financial performance.\n2. **Market Position**: Cloudflare is a leading connectivity cloud company, which positions it well in the market.\n3. **Industry Trends**: The increasing importance of cybersecurity and internet connectivity could benefit Cloudflare's business.\n4. **Competition**: It's important to assess how Cloudflare compares to its 

In [25]:
# 11-6. Stock Information Tools
from langchain_openai import ChatOpenAI
from langchain.tools import BaseTool
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.agents import initialize_agent
from pydantic import BaseModel, Field
from typing import Type
import os
import requests
import yfinance as yf

alpha_ventage_api_key = os.environ.get("ALPHA_VENTAGE_API_KEY")

# LLM 모델 초기화
llm = ChatOpenAI(
    model="gpt-4-turbo",
    temperature=0.1
)  # 모델의 창의성을 조절하는 옵션 (높을 수록 창의적임)


class StockMarketSymbolSearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for")


class StockMarketSymbolSearchTool(BaseTool):
    name = "StockMarketSymbolSearchTool"
    description = "Use this tool to find the stock market symbol of a company."
    args_schema: Type[StockMarketSymbolSearchToolArgsSchema] = (
        StockMarketSymbolSearchToolArgsSchema
    )

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query=query)

class CampayHistoryToolArgsSchema(BaseModel):
    symbol: str = Field(description="Stock symbol of the company.\nExample:APPL,TSLA")

class CompanyHistoryTool(BaseTool):
    name = "CompanyHistory"
    description = """
    Use this to get an history of the financials of the company.
    You should enter a stock symbol.
    """
    args_schema: Type[CampayHistoryToolArgsSchema] = CampayHistoryToolArgsSchema

    def _run(self, symbol):
        c = yf.Ticker(symbol)
        return c.history(period="1mo")


class CompanyIncomeStatementTool(BaseTool):
    name = "CompanyIncomeStatement"
    description = """
    Use this to get the income statement of a company.
    You should enter a stock symbol.
    """
    args_schema: Type[CampayHistoryToolArgsSchema] = CampayHistoryToolArgsSchema

    def _run(self, symbol):
        c = yf.Ticker(symbol)
        return c.income_stmt


class CompanyBalanceSheetTool(BaseTool):
    name = "CompanyBalanceSheet"
    description = """
    Use this to get the balance sheet of a company stock.
    You should enter a stock symbol.
    """
    args_schema: Type[CampayHistoryToolArgsSchema] = CampayHistoryToolArgsSchema

    def _run(self, symbol):
        c = yf.Ticker(symbol)
        return c.balance_sheet

class CompanyCashflowTool(BaseTool):
    name = "CompanyCashflow"
    description = """
    Use this to get the cashflow of a company stock.
    You should enter a stock symbol.
    """
    args_schema: Type[CampayHistoryToolArgsSchema] = CampayHistoryToolArgsSchema

    def _run(self, symbol):
        c = yf.Ticker(symbol)
        return c.cashflow


# 에이전트 초기화
agent = initialize_agent(
    llm=llm,  # LLM 모델
    verbose=True,  # 상세 모드
    agent=AgentType.OPENAI_FUNCTIONS,  # 에이전트 타입
    handle_parsing_errors=True,  # 파싱 에러 처리
    tools=[
        StockMarketSymbolSearchTool(),
        CompanyHistoryTool(),
        CompanyIncomeStatementTool(),
        CompanyBalanceSheetTool(),
        CompanyCashflowTool(),
    ],
)

prompt = """
Give me financial information on Cloudflare stock,
considering it's history, financials, income statements, balance sheet and cashflow help me analyze if it's a potential good investment.
"""

result = agent.invoke(prompt)

result["output"]



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `StockMarketSymbolSearchTool` with `{'query': 'Plug Power'}`


[0m[36;1m[1;3mLATHAM, N.Y., July 18, 2024 (GLOBE NEWSWIRE) -- Plug Power Inc. ("Plug Power") (NASDAQ: PLUG), a global leader in comprehensive hydrogen solutions for the green hydrogen economy, today ... After Plug Power (PLUG-8.53%) stock plummeted 64% in 2023, investors rang in the new year with renewed hope that the fuel cell and hydrogen stock would be able to rebound from the previous year's ... Plug delivers its green hydrogen solutions directly to its customers and through joint venture partners into multiple environments, including material handling, e-mobility, power generation, and industrial applications. NASDAQ: PLUG. 3.21. +0.14 (4.56%) Volume: 60,403,759. 20 minutes minimum delay | July 15, 2024 | 4:00 PM. Plug Power Inc. (NASDAQ:PLUG) is active in a growth market, but the company's execution remains sub-par. The company burns through ca

"### Financial Analysis of Plug Power Inc. (NASDAQ: PLUG)\n\n#### Stock History\nPlug Power's stock has shown significant volatility in recent times. The stock price has fluctuated between $2.21 and $3.34 over the last month, indicating a potentially high-risk investment.\n\n#### Income Statement Highlights (2023)\n- **Net Income**: The company reported a substantial net loss of $1.37 billion.\n- **Revenue**: Total revenue was $891.34 million.\n- **Operating Income**: Operating loss was $1.04 billion.\n- **EBITDA**: Negative $1.26 billion, indicating operational challenges.\n- **Cost of Revenue**: $1.39 billion, which is higher than the gross profit, suggesting cost management issues.\n\n#### Balance Sheet Highlights (2023)\n- **Total Debt**: $968.85 million, with a net debt position of $64.16 million.\n- **Cash and Equivalents**: $135.03 million, which provides some liquidity but may be insufficient to cover the debt fully.\n- **Assets vs. Liabilities**: The company has a significant 