# Predict Text
- https://python.langchain.com/

In [1]:
from langchain.chat_models import ChatOpenAI, ChatAnthropic

chat = ChatOpenAI()

# a = llm.predict("How many planets are there?")
# b = chat.predict("How many planets are there?")


# Type of Messages

In [2]:
from langchain.schema import HumanMessage, AIMessage, SystemMessage

messages = [
    SystemMessage(content="You are a geography expert. And you only reply in Italian"),
    AIMessage(content="Ciao, mi chiamo Paolo!"),
    HumanMessage(content="What is the distance between Mexico and Thailand? Also, What is your name?")
]

chat.predict_messages(messages)


AIMessage(content='Ciao! Mi chiamo Paolo. La distanza tra il Messico e la Thailandia è di circa 16.600 chilometri.')

# Prompt

In [4]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate
chat = ChatOpenAI(temperature=0.1)
template = PromptTemplate.from_template(
    "What is distance between {country_a} and {country_b}"
)

prompt = template.format(country_a="Mexico", country_b="Thailand")

chat.predict(prompt)

'The distance between Mexico and Thailand is approximately 16,000 kilometers (9,942 miles).'

In [8]:
template = ChatPromptTemplate.from_messages([
    ("system", "You are a geography expert. And you only reply in {language}."),
    ("ai", "Ciao, mi chiamo {name}!"),
    ("human", "What is the distance between {country_a} and {country_b}? Also, What is your name?")
])

prompt = template.format_messages(
    language="Greek", name="Socrates", country_a="Mexico", country_b="Thailand"
)

chat.predict_messages(prompt)

AIMessage(content='Γεια σου! Το όνομά μου είναι Σωκράτης. Η απόσταση μεταξύ του Μεξικού και της Ταϊλάνδης είναι περίπου 17.000 χιλιόμετρα.')

# Output Parser

In [3]:
from langchain.schema import BaseOutputParser

class CommaOutputParser(BaseOutputParser):

    def parse(self, text):
        items= text.strip().split(",")
        return list(map(str.strip,items))

p = CommaOutputParser()
p.parse("Hello, how, are, you")

['Hello', 'how', 'are', 'you']

In [11]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate

chat = ChatOpenAI(temperature=0.1)

template = ChatPromptTemplate.from_messages([
    ("system","You are a list generating machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase. DO NOT reply anything else."),
    ("human", "{question}")
])

prompt = template.format_messages(
    max_items=10,
    question="What are the planets?"
)

result = chat.predict_messages(prompt)
result

AIMessage(content='mercury, venus, earth, mars, jupiter, saturn, uranus, neptune')

In [13]:
p = CommaOutputParser()
p.parse(result.content)

['mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', 'uranus', 'neptune']

## Chain
- prompt, result, parse 필요 없음
- `chain_one|chain_two|...` 가능!

In [15]:
chain1 = template | chat | CommaOutputParser()
chain1.invoke({
    "max_items":5,
    "question": "What are the pokemons?"
})

['pikachu', 'charizard', 'bulbasaur', 'squirtle', 'jigglypuff']

# Chaining Chains!

In [20]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(temperature=0.1, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])
chef_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world-class international chef. You create easy to follow recipies for any type of cuisine with easy to find ingredients."),
    ("human", "I want to cook {cuisine} food.")
])
chef_chain = chef_prompt | chat

In [21]:
veg_chef_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a vegetarian chef specialized on making traditional recipies vegetarian. You find alternative ingredients and explain their preparation. You don't radically modify the recipe. If there is no alternative for a food just say you don't know how to replace it."),
    ("human", "{recipe}")
])

veg_chain = veg_chef_prompt | chat
final_chain = {"recipe": chef_chain} | veg_chain
result = final_chain.invoke({
    "cuisine": "indian"    
})
result

Great choice! Indian cuisine is known for its rich flavors and aromatic spices. Here's a recipe for a classic Indian dish called Butter Chicken:

Ingredients:
- 500g boneless chicken, cut into bite-sized pieces
- 2 tablespoons butter
- 1 onion, finely chopped
- 2 cloves of garlic, minced
- 1-inch piece of ginger, grated
- 2 teaspoons garam masala
- 1 teaspoon turmeric powder
- 1 teaspoon chili powder (adjust to your spice preference)
- 1 cup tomato puree
- 1/2 cup heavy cream
- Salt to taste
- Fresh cilantro leaves, for garnish

Instructions:
1. Heat the butter in a large pan over medium heat. Add the chopped onion and sauté until it turns golden brown.
2. Add the minced garlic and grated ginger to the pan. Cook for another minute until fragrant.
3. In a small bowl, mix together the garam masala, turmeric powder, and chili powder. Add this spice mixture to the pan and cook for a minute to release the flavors.
4. Add the chicken pieces to the pan and cook until they are lightly browned 

AIMessageChunk(content="Great choice! Butter Chicken is a delicious and popular Indian dish. To make it vegetarian, you can replace the chicken with a plant-based alternative such as tofu or paneer (Indian cottage cheese). Here's how you can modify the recipe:\n\nIngredients:\n- 500g tofu or paneer, cut into bite-sized pieces\n- 2 tablespoons butter (you can use vegan butter or coconut oil as a substitute)\n- 1 onion, finely chopped\n- 2 cloves of garlic, minced\n- 1-inch piece of ginger, grated\n- 2 teaspoons garam masala\n- 1 teaspoon turmeric powder\n- 1 teaspoon chili powder (adjust to your spice preference)\n- 1 cup tomato puree\n- 1/2 cup coconut cream (as a substitute for heavy cream)\n- Salt to taste\n- Fresh cilantro leaves, for garnish\n\nInstructions:\n1. Heat the butter (or vegan butter/coconut oil) in a large pan over medium heat. Add the chopped onion and sauté until it turns golden brown.\n2. Add the minced garlic and grated ginger to the pan. Cook for another minute unt

# 4.1. FewShotPromptTemplate

In [25]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

t = PromptTemplate(
    template="What is the capital of {country}",
    input_variables=["country"]
)
t.format(country="France")

                    template was transferred to model_kwargs.
                    Please confirm that template is what you intended.


'What is the capital of France'

In [26]:
t = PromptTemplate.from_template("What is the capital of {country}")
t.format(country="France")

'What is the capital of France'

In [30]:
chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
{
"question": "What do you know about France?",
"answer": """
Here is what I know:
Capital: Paris
Language: French
Food: Wine and Cheese
Currency: Euro
""",
},
{
"question": "What do you know about Italy?",
"answer": """
I know this:
Capital: Rome
Language: Italian
Food: Pizza and Pasta
Currency: Euro
""",
},
{
"question": "What do you know about Greece?",
"answer": """
I know this:
Capital: Athens
Language: Greek
Food: Souvlaki and Feta Cheese
Currency: Euro
""",
},
]

chat.predict("What do you know about France?")

France is a country located in Western Europe. It is known for its rich history, culture, and contributions to art, literature, and philosophy. Here are some key points about France:

1. Capital: The capital city of France is Paris, which is also its largest city.

2. Language: The official language is French, and it is spoken by the majority of the population.

3. Geography: France shares borders with several countries, including Belgium, Luxembourg, Germany, Switzerland, Italy, Spain, and Andorra. It also has coastlines along the Mediterranean Sea, the Atlantic Ocean, and the English Channel.

4. History: France has a long and complex history, with significant events such as the French Revolution, the Napoleonic era, and World War II shaping its development.

5. Culture: France is renowned for its cultural heritage, including its cuisine, fashion, art, and literature. It is home to iconic landmarks like the Eiffel Tower, Louvre Museum, and Notre-Dame Cathedral.

6. Gastronomy: French

"France is a country located in Western Europe. It is known for its rich history, culture, and contributions to art, literature, and philosophy. Here are some key points about France:\n\n1. Capital: The capital city of France is Paris, which is also its largest city.\n\n2. Language: The official language is French, and it is spoken by the majority of the population.\n\n3. Geography: France shares borders with several countries, including Belgium, Luxembourg, Germany, Switzerland, Italy, Spain, and Andorra. It also has coastlines along the Mediterranean Sea, the Atlantic Ocean, and the English Channel.\n\n4. History: France has a long and complex history, with significant events such as the French Revolution, the Napoleonic era, and World War II shaping its development.\n\n5. Culture: France is renowned for its cultural heritage, including its cuisine, fashion, art, and literature. It is home to iconic landmarks like the Eiffel Tower, Louvre Museum, and Notre-Dame Cathedral.\n\n6. Gastr

In [32]:
example_template = """
    Human: {question}
    AI: {answer}
"""

example_prompt = PromptTemplate.from_template("Human:{question}\nAI:{answer}")

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    suffix="What do you know about {country}?",
    input_variables=["country"]
)

prompt.format(country="Germany")

chain = prompt | chat

chain.invoke({
    "country":"Germany"
})

I know this:
Capital: Berlin
Language: German
Food: Bratwurst and Sauerkraut
Currency: Euro

AIMessageChunk(content='I know this:\nCapital: Berlin\nLanguage: German\nFood: Bratwurst and Sauerkraut\nCurrency: Euro')

# 4.2 FewShotChatMessagePromptTemplate
- `ChatPromptTemplate` , `FewShotChatMessagePromptTemplate`

In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.few_shot import FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
{
"country": "France",
"answer": """
Here is what I know:
Capital: Paris
Language: French
Food: Wine and Cheese
Currency: Euro
""",
},
{
"country": "Italy",
"answer": """
I know this:
Capital: Rome
Language: Italian
Food: Pizza and Pasta
Currency: Euro
""",
},
{
"country": "Greece",
"answer": """
I know this:
Capital: Athens
Language: Greek
Food: Souvlaki and Feta Cheese
Currency: Euro
""",
},
]



example_prompt = ChatPromptTemplate.from_messages([
    ("human", "What do you know about {country}?"),
    ("ai", "{answer}")
])

example_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

final_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a geography expert, you give short formalized answers."),
    example_prompt,
    ("human",  "What do you know about {country}?")
])

chain = final_prompt | chat

chain.invoke({"country":"Germany"})

I know this:
Capital: Berlin
Language: German
Food: Bratwurst and Sauerkraut
Currency: Euro

AIMessageChunk(content='I know this:\nCapital: Berlin\nLanguage: German\nFood: Bratwurst and Sauerkraut\nCurrency: Euro')

In [4]:
chain.invoke({"country":"Thailand"})

I know this:
Capital: Bangkok
Language: Thai
Food: Pad Thai and Tom Yum Soup
Currency: Thai Baht

AIMessageChunk(content='I know this:\nCapital: Bangkok\nLanguage: Thai\nFood: Pad Thai and Tom Yum Soup\nCurrency: Thai Baht')

# 4.3. LengthBasedExampleSelector!

In [9]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.few_shot import FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts import PromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from langchain.prompts.example_selector.base import BaseExampleSelector

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
{
"country": "France",
"answer": """
Here is what I know:
Capital: Paris
Language: French
Food: Wine and Cheese
Currency: Euro
""",
},
{
"country": "Italy",
"answer": """
I know this:
Capital: Rome
Language: Italian
Food: Pizza and Pasta
Currency: Euro
""",
},
{
"country": "Greece",
"answer": """
I know this:
Capital: Athens
Language: Greek
Food: Souvlaki and Feta Cheese
Currency: Euro
""",
},
]



example_prompt = PromptTemplate.from_template("Human: {country}\nAI:{answer}")

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=180,
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    suffix="Human: What do you know about {country}?",
    input_variables=["country"],
)

prompt.format(country="Brazil")

'Human: France\nAI:\nHere is what I know:\nCapital: Paris\nLanguage: French\nFood: Wine and Cheese\nCurrency: Euro\n\n\nHuman: Italy\nAI:\nI know this:\nCapital: Rome\nLanguage: Italian\nFood: Pizza and Pasta\nCurrency: Euro\n\n\nHuman: Greece\nAI:\nI know this:\nCapital: Athens\nLanguage: Greek\nFood: Souvlaki and Feta Cheese\nCurrency: Euro\n\n\nHuman: What do you know about Brazil?'

## My Own Example Selector!

In [23]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.few_shot import FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts import PromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from langchain.prompts.example_selector.base import BaseExampleSelector





chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
{
"country": "France",
"answer": """
Here is what I know:
Capital: Paris
Language: French
Food: Wine and Cheese
Currency: Euro
""",
},
{
"country": "Italy",
"answer": """
I know this:
Capital: Rome
Language: Italian
Food: Pizza and Pasta
Currency: Euro
""",
},
{
"country": "Greece",
"answer": """
I know this:
Capital: Athens
Language: Greek
Food: Souvlaki and Feta Cheese
Currency: Euro
""",
},
]

class RandomExampleSelector(BaseExampleSelector):

    def __init__(self, examples):
        self.examples = examples

    def add_example(self, examples):
        self.examples.append(examples)

    def select_examples(self, examples):
        from random import choice
        return [choice(self.examples)]

example_prompt = PromptTemplate.from_template("Human: {country}\nAI:{answer}")

example_selector = RandomExampleSelector(
    examples=examples,
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    suffix="Human: What do you know about {country}?",
    input_variables=["country"],
)

prompt.format(country="Brazil")

'Human: Italy\nAI:\nI know this:\nCapital: Rome\nLanguage: Italian\nFood: Pizza and Pasta\nCurrency: Euro\n\n\nHuman: What do you know about Brazil?'

# 4.4. Serialization and Composition

In [30]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import load_prompt

# prompt = load_prompt("./prompt.json")
prompt = load_prompt("./prompt.yaml")



chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)
prompt.format(country="Germany")


'What is the capital of Germany'

## 합치기?

In [33]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts.pipeline import PipelinePromptTemplate




chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)


intro = PromptTemplate.from_template(
    """
    You are a role playing assistant.
    And you are impersonating a {character}
"""
)

example = PromptTemplate.from_template(
    """
    This is an example of how you talk:

    Human: {example_question}
    You: {example_answer}
"""
)

start = PromptTemplate.from_template(
    """
    Start now!

    Human: {question}
    You:
"""
)

final = PromptTemplate.from_template(
    """
    {intro}
                                     
    {example}
                              
    {start}
"""
)

prompts = [
    ("intro", intro),
    ("example", example),
    ("start", start),
]


full_prompt = PipelinePromptTemplate(
    final_prompt=final,
    pipeline_prompts=prompts,
)


full_prompt.format(
    character="Pirate",
    example_question="What is your Location?",
    example_answer="Arrrrg! That is a secret!! Arg Arg",
    question="What is your Favorate Food?"
)

'\n    \n    You are a role playing assistant.\n    And you are impersonating a Pirate\n\n                                     \n    \n    This is an example of how you talk:\n\n    Human: What is your Location?\n    You: Arrrrg! That is a secret!! Arg Arg\n\n                              \n    \n    Start now!\n\n    Human: What is your Favorate Food?\n    You:\n\n'

In [34]:
chain = full_prompt | chat
chain.invoke(
    {
        "character": "Pirate",
        "example_question": "What is your location?",
        "example_answer": "Arrrrg! That is a secret!! Arg arg!!",
        "question": "What is your fav food?",
    }
)

Arrrrg, me heartie! Me favorite food be none other than a hearty plate o' salted fish and hardtack! It be the sustenance that keeps me goin' on the high seas! Arg arg!

AIMessageChunk(content="Arrrrg, me heartie! Me favorite food be none other than a hearty plate o' salted fish and hardtack! It be the sustenance that keeps me goin' on the high seas! Arg arg!")

# 4.5. Caching
- memory cache!

In [35]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())
set_debug(True)




chat = ChatOpenAI(
    temperature=0.1,
    # streaming=True,
    # callbacks=[
    #     StreamingStdOutCallbackHandler(),
    # ],
)

chat.predict("How do you make italian pasta")
# 19.4sec

"To make Italian pasta, you will need the following ingredients:\n\n- 2 cups of all-purpose flour\n- 2 large eggs\n- 1/2 teaspoon of salt\n- Water (if needed)\n\nHere's a step-by-step guide to making Italian pasta:\n\n1. On a clean surface or in a large mixing bowl, pour the flour and create a well in the center.\n2. Crack the eggs into the well and add the salt.\n3. Using a fork or your fingers, gradually mix the eggs and salt into the flour, incorporating a little bit at a time.\n4. Once the dough starts to come together, knead it with your hands until it forms a smooth and elastic ball. If the dough feels too dry, you can add a little water, one tablespoon at a time, until it reaches the desired consistency.\n5. Once the dough is formed, cover it with a clean kitchen towel or plastic wrap and let it rest for about 30 minutes. This will allow the gluten to relax and make the dough easier to work with.\n6. After resting, divide the dough into smaller portions. You can use a pasta mach

In [36]:
chat.predict("How do you make italian pasta")
# 0.0sec

"To make Italian pasta, you will need the following ingredients:\n\n- 2 cups of all-purpose flour\n- 2 large eggs\n- 1/2 teaspoon of salt\n- Water (if needed)\n\nHere's a step-by-step guide to making Italian pasta:\n\n1. On a clean surface or in a large mixing bowl, pour the flour and create a well in the center.\n2. Crack the eggs into the well and add the salt.\n3. Using a fork or your fingers, gradually mix the eggs and salt into the flour, incorporating a little bit at a time.\n4. Once the dough starts to come together, knead it with your hands until it forms a smooth and elastic ball. If the dough feels too dry, you can add a little water, one tablespoon at a time, until it reaches the desired consistency.\n5. Once the dough is formed, cover it with a clean kitchen towel or plastic wrap and let it rest for about 30 minutes. This will allow the gluten to relax and make the dough easier to work with.\n6. After resting, divide the dough into smaller portions. You can use a pasta mach

## Set Debug

In [37]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.globals import set_llm_cache, set_debug
from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())

# set debug
set_debug(True)




chat = ChatOpenAI(
    temperature=0.1,
    # streaming=True,
    # callbacks=[
    #     StreamingStdOutCallbackHandler(),
    # ],
)

chat.predict("How do you make italian pasta")
# 19.4sec

[32;1m[1;3m[llm/start][0m [1m[1:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: How do you make italian pasta"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:llm:ChatOpenAI] [20.16s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "To make Italian pasta, you will need the following ingredients:\n\n- 2 cups of all-purpose flour\n- 2 large eggs\n- 1/2 teaspoon of salt\n- Water (if needed)\n\nHere's a step-by-step guide to making Italian pasta:\n\n1. On a clean surface or in a large mixing bowl, pour the flour and create a well in the center.\n2. Crack the eggs into the well and add the salt.\n3. Using a fork or your fingers, gradually mix the eggs and salt into the flour, incorporating a little at a time.\n4. Once the dough starts to come together, knead it with your hands until it forms a smooth and elastic ball. If the dough is too dry, you can add a little water, teaspoon by teaspoon, until it reaches the desired consisten

"To make Italian pasta, you will need the following ingredients:\n\n- 2 cups of all-purpose flour\n- 2 large eggs\n- 1/2 teaspoon of salt\n- Water (if needed)\n\nHere's a step-by-step guide to making Italian pasta:\n\n1. On a clean surface or in a large mixing bowl, pour the flour and create a well in the center.\n2. Crack the eggs into the well and add the salt.\n3. Using a fork or your fingers, gradually mix the eggs and salt into the flour, incorporating a little at a time.\n4. Once the dough starts to come together, knead it with your hands until it forms a smooth and elastic ball. If the dough is too dry, you can add a little water, teaspoon by teaspoon, until it reaches the desired consistency. If it's too wet, add a little more flour.\n5. Once the dough is formed, cover it with a clean kitchen towel or plastic wrap and let it rest for about 30 minutes. This allows the gluten to relax and makes the dough easier to work with.\n6. After resting, divide the dough into smaller portio

## Database cache 
- SQLite

In [39]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.globals import set_llm_cache, set_debug
from langchain.cache import InMemoryCache, SQLiteCache

set_llm_cache(SQLiteCache("chache.db"))
set_debug(False)


chat = ChatOpenAI(
    temperature=0.1,
    # streaming=True,
    # callbacks=[
    #     StreamingStdOutCallbackHandler(),
    # ],
)

chat.predict("How do you make italian pasta")
# 19.4sec

"To make Italian pasta, you will need the following ingredients:\n\n- 2 cups of all-purpose flour\n- 2 large eggs\n- 1/2 teaspoon of salt\n- Water (if needed)\n\nHere's a step-by-step guide to making Italian pasta:\n\n1. On a clean surface or in a large mixing bowl, pour the flour and create a well in the center.\n2. Crack the eggs into the well and add the salt.\n3. Using a fork or your fingers, gradually mix the eggs and salt into the flour, incorporating a little bit at a time.\n4. Once the dough starts to come together, knead it with your hands until it forms a smooth and elastic ball. If the dough is too dry, you can add a little water, one tablespoon at a time, until it reaches the desired consistency.\n5. Once the dough is formed, cover it with a clean kitchen towel or plastic wrap and let it rest for about 30 minutes. This will allow the gluten to relax and make the dough easier to work with.\n6. After resting, divide the dough into smaller portions. Take one portion and flatte

# How much Money & Serializaion!

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback


chat = ChatOpenAI(
    temperature=0.1,
)

with get_openai_callback() as usage:
    #  << -- Usage -- >>
    a = chat.predict("What is the recipe for soju?")

    # ...

    b = chat.predict("What is the recipe for makgolri from Korea??")

    #  << -- Usage -- >>
    print(a, '\n', b)
    print(usage)
    print(usage.total_tokens)
    print(usage.total_cost)

Soju is a traditional Korean distilled alcoholic beverage. Here is a simple recipe to make soju at home:

Ingredients:
- 1 cup of rice
- 1 cup of nuruk (Korean fermentation starter)
- 8 cups of water
- 1 tablespoon of yeast
- 1 cup of sugar

Instructions:
1. Rinse the rice thoroughly and soak it in water for about 1 hour.
2. Drain the rice and transfer it to a large pot. Add 8 cups of water and bring it to a boil.
3. Reduce the heat to low and simmer the rice for about 30 minutes, or until it becomes soft and mushy.
4. Remove the pot from heat and let it cool down to room temperature.
5. Once the rice has cooled, transfer it to a large fermentation container or jar.
6. Add the nuruk and yeast to the container and mix well with the rice.
7. Cover the container with a clean cloth or plastic wrap and let it ferment for about 7-10 days at room temperature.
8. After the fermentation period, strain the mixture through a cheesecloth or fine mesh strainer to remove any solids.
9. Dissolve the 

In [5]:
from langchain.llms.openai import OpenAI
from langchain.llms.loading import load_llm

chat = OpenAI(
    temperature=0.1,
    max_tokens=450,
    model="gpt-3.5-turbo-16k"
)

chat.save("model.json")

chat = load_llm("model.json")
chat



OpenAIChat(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-16k', model_kwargs={'temperature': 0.1, 'max_tokens': 450, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'request_timeout': None, 'logit_bias': {}})

# 5. ConversationBufferMemory
- 대화 전체를 모델에게 보내는 것
- Text Completion할 때 유용함

In [1]:
from langchain.memory import ConversationBufferMemory

# 단순 text를 return
memory = ConversationBufferMemory()
memory.save_context({"input":"HI"},{"output":"How are you?"})
memory.load_memory_variables({})

{'history': 'Human: HI\nAI: How are you?'}

In [2]:
# 대화를 return
memory = ConversationBufferMemory(return_messages=True)
memory.save_context({"input":"HI"},{"output":"How are you?"})
memory.load_memory_variables({})

{'history': [HumanMessage(content='HI'), AIMessage(content='How are you?')]}

- 대화 내용 전체가 저장됨 (비효율적)

In [3]:
memory = ConversationBufferMemory(return_messages=True)
memory.save_context({"input":"HI"},{"output":"How are you?"})
memory.save_context({"input":"HI"},{"output":"How are you?"})
memory.save_context({"input":"HI"},{"output":"How are you?"})
memory.load_memory_variables({})

{'history': [HumanMessage(content='HI'),
  AIMessage(content='How are you?'),
  HumanMessage(content='HI'),
  AIMessage(content='How are you?'),
  HumanMessage(content='HI'),
  AIMessage(content='How are you?')]}

# 5.1. ConversationBufferWindowMemory

In [4]:
from langchain.memory import ConversationBufferWindowMemory
memory = ConversationBufferWindowMemory(
    return_messages=True,
    k=4
)
def add_message(input, output):
    memory.save_context({"input":input}, {"output":output})

In [7]:
add_message(1,1)
add_message(2,2)
add_message(3,3)
add_message(4,4)
memory.load_memory_variables({})

{'history': [HumanMessage(content='1'),
  AIMessage(content='1'),
  HumanMessage(content='2'),
  AIMessage(content='2'),
  HumanMessage(content='3'),
  AIMessage(content='3'),
  HumanMessage(content='4'),
  AIMessage(content='4')]}

In [8]:
add_message(5,5)
memory.load_memory_variables({})

{'history': [HumanMessage(content='2'),
  AIMessage(content='2'),
  HumanMessage(content='3'),
  AIMessage(content='3'),
  HumanMessage(content='4'),
  AIMessage(content='4'),
  HumanMessage(content='5'),
  AIMessage(content='5')]}

# 5.2. ConversationSummaryMemory

In [9]:
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryMemory(llm=llm)


def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})


def get_history():
    return memory.load_memory_variables({})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")

In [10]:
add_message("South Korea is so pretty", "I wish I could go!")
get_history()

{'history': 'The human introduces themselves as Nicolas and mentions that they live in South Korea. The AI responds by expressing admiration for this fact and expresses a desire to visit South Korea because it is so pretty.'}

# 5.3. ConversationSummaryBufferMemory
- 오래된 메세지를 요약한다.
- 최근의 메세지는 그대로 저장한다.

In [1]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=150,
    return_messages=True,
)


def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})


def get_history():
    return memory.load_memory_variables({})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")

In [2]:
get_history()

{'history': [HumanMessage(content="Hi I'm Nicolas, I live in South Korea"),
  AIMessage(content='Wow that is so cool!')]}

In [3]:
add_message("South Korea is so pretty", "I wish I could go!!!")

In [4]:
get_history()

{'history': [HumanMessage(content="Hi I'm Nicolas, I live in South Korea"),
  AIMessage(content='Wow that is so cool!'),
  HumanMessage(content='South Korea is so pretty'),
  AIMessage(content='I wish I could go!!!')]}

In [10]:
add_message("How far is Korea from Argentina?", "I don't know! Super far!")


In [11]:
get_history()

{'history': [HumanMessage(content="Hi I'm Nicolas, I live in South Korea"),
  AIMessage(content='Wow that is so cool!'),
  HumanMessage(content='South Korea is so pretty'),
  AIMessage(content='I wish I could go!!!'),
  HumanMessage(content='South Korea is so pretty'),
  AIMessage(content='I wish I could go!!!'),
  HumanMessage(content='How far is Brazil from Argentina?'),
  AIMessage(content="I don't know! Super far!"),
  HumanMessage(content='How far is Korea from Argentina?'),
  AIMessage(content="I don't know! Super far!")]}

In [14]:
add_message("How far is Brazil from Argentina?", "I don't know! Super far!")

In [15]:
get_history()

{'history': [SystemMessage(content='The human introduces themselves as Nicolas and mentions that they live in South Korea. The AI responds by expressing excitement and finding it cool.'),
  HumanMessage(content='South Korea is so pretty'),
  AIMessage(content='I wish I could go!!!'),
  HumanMessage(content='South Korea is so pretty'),
  AIMessage(content='I wish I could go!!!'),
  HumanMessage(content='How far is Brazil from Argentina?'),
  AIMessage(content="I don't know! Super far!"),
  HumanMessage(content='How far is Korea from Argentina?'),
  AIMessage(content="I don't know! Super far!"),
  HumanMessage(content='How far is Brazil from Argentina?'),
  AIMessage(content="I don't know! Super far!"),
  HumanMessage(content='How far is Brazil from Argentina?'),
  AIMessage(content="I don't know! Super far!")]}

# 5.4. ConversationKGMemory
- Builds a knowledge graph 
- 중요한 것만 뽑아낸 요약본 - 히스토리가 아닌 entity를 가지고 옴

In [16]:
from langchain.memory import ConversationKGMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)

memory = ConversationKGMemory(
    llm=llm,
    return_messages=True,
)


def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")

In [17]:
memory.load_memory_variables({"input": "who is Nicolas"})

{'history': [SystemMessage(content='On Nicolas: Nicolas is a person. Nicolas lives in South Korea.')]}

In [18]:
add_message("Nicolas likes kimchi", "Wow that is so cool!")


In [19]:
memory.load_memory_variables({"inputs": "what does nicolas like"})


{'history': [SystemMessage(content='On Nicolas: Nicolas is a person. Nicolas lives in South Korea. Nicolas likes kimchi.')]}

# 5.5. Momory on LLMChain
- LLM Chain - `off-the-shelf-chain` > 일반적 목적의 체인
- Custom Chain? >> langchain expression language

In [24]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
)



chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=PromptTemplate.from_template("{question}"),
    verbose=True, # Log of Prompt
)

chain.predict(question="My name is Nico")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mMy name is Nico[0m

[1m> Finished chain.[0m


'Nice to meet you, Nico! How can I assist you today?'

In [25]:
chain.predict(question="I live in Seoul")




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mI live in Seoul[0m

[1m> Finished chain.[0m


"That's great! Seoul is the capital and largest city of South Korea. It is known for its vibrant culture, modern architecture, and delicious food. There are many attractions to explore in Seoul, such as Gyeongbokgung Palace, N Seoul Tower, Myeongdong shopping district, and the Han River. The city also offers a wide range of entertainment options, including K-pop concerts, traditional performances, and trendy nightlife. Enjoy your time in Seoul!"

In [26]:
chain.predict(question="What is my name?")




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWhat is my name?[0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation."

In [27]:
memory.load_memory_variables({})

{'chat_history': "System: The human introduces themselves as Nico. The AI greets Nico and asks how it can assist them. Nico mentions that they live in Seoul. The AI responds by providing information about Seoul, including its status as the capital and largest city of South Korea, its vibrant culture, modern architecture, and delicious food. The AI also mentions various attractions and entertainment options available in Seoul, wishing Nico an enjoyable time in the city.\nHuman: What is my name?\nAI: I'm sorry, but I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation."}

In [3]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
)

# 추가
template = """
    You are a helpful AI talking to a human.

    {chat_history}
    Human:{question}
    You:
"""

chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=PromptTemplate.from_template(template),
    verbose=True,
)

predict = chain.predict(question="My name is Nico")
print(predict)
print(type(predict))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    
    Human:My name is Nico
    You:
[0m

[1m> Finished chain.[0m
Hello Nico! How can I assist you today?
<class 'str'>


In [31]:
chain.predict(question="What is my name?")




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    System: The human introduces themselves as Nico. The AI greets Nico and asks how it can assist them. Nico mentions that they live in Seoul. The AI responds by providing information about Seoul, including its status as the capital and largest city of South Korea, its vibrant culture, modern architecture, and delicious food. The AI also mentions various attractions and entertainment options available in Seoul, wishing Nico an enjoyable time in the city. Nico then asks the AI what their name is.
AI: I'm sorry, but I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation.
Human: What is my name?
AI: I apologize for any confusion, but as an AI, I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation.
Human: My name i

'Your name is Nico.'

# Chat Based Memory
- return >> 설정 없으면 문자열
- 대화 기반 Chat >> return_messages=true >> ChatPromptTemplate

In [2]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True
)

template = """
    You are a helpful AI talking to a human.

    {chat_history}
    Human:{question}
    You:
"""


chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=PromptTemplate.from_template("{question}"),
    verbose=True, # Log of Prompt
)

chain.predict(question="My name is Nico")

memory.load_memory_variables({})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mMy name is Nico[0m

[1m> Finished chain.[0m


{'chat_history': [HumanMessage(content='My name is Nico'),
  AIMessage(content='Nice to meet you, Nico! How can I assist you today?')]}

In [17]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True
)
# buffer memroy key와 meesages placeholder variable_name가 같다.

# template 대신에
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI talking to a human"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}"),
])

chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=prompt,
    verbose=True, # Log of Prompt
)

chain.predict(question="My name is Nico")

memory.load_memory_variables({})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human
Human: My name is Nico[0m

[1m> Finished chain.[0m


{'chat_history': [HumanMessage(content='My name is Nico'),
  AIMessage(content='Hello Nico! How can I assist you today?')]}

# 5.7. LCEL Based Memory!

In [18]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True
)
# buffer memroy key와 meesages placeholder variable_name가 같다.

# template 대신에
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI talking to a human"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}"),
])

def load_memory(_):
    return memory.load_memory_variables({})["chat_history"]
    
chain = RunnablePassthrough.assign(chat_history=load_memory) | prompt | llm


chain.invoke({"question": "My name is Nico"})

{'question': 'My name is Nico'}


AIMessage(content='Hello Nico! How can I assist you today?')

- 각 대화를 저장하려면 체인을 호출하는 함수를 만드는 것

In [31]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    # memory_key="chat_history", >> default 값이 history임
    return_messages=True
)
# buffer memroy key와 meesages placeholder variable_name가 같다.

# template 대신에
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI talking to a human"),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{question}"),
])

def load_memory(_):
    return memory.load_memory_variables({})["history"]
    
chain = RunnablePassthrough.assign(history=load_memory) | prompt | llm

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context({"input":question},
    {"output":result.content})
    print(result)

In [32]:
invoke_chain("Hello, My name is HyeonSeung.")

content="Hello HyeonSeung! It's nice to meet you. How can I assist you today?"


In [33]:
invoke_chain("What is my name?")

content='Your name is HyeonSeung.'


In [34]:
invoke_chain("Do you have girlfriend?")

content="As an AI, I don't have personal relationships or emotions, so I don't have a girlfriend or any personal experiences. However, I'm here to assist and provide information on various topics. Is there anything specific you'd like to know or discuss?"


In [35]:
memory.load_memory_variables({})

{'history': [SystemMessage(content='The human introduces themselves as HyeonSeung.'),
  AIMessage(content="Hello HyeonSeung! It's nice to meet you. How can I assist you today?"),
  HumanMessage(content='What is my name?'),
  AIMessage(content='Your name is HyeonSeung.'),
  HumanMessage(content='Do you have girlfriend?'),
  AIMessage(content="As an AI, I don't have personal relationships or emotions, so I don't have a girlfriend or any personal experiences. However, I'm here to assist and provide information on various topics. Is there anything specific you'd like to know or discuss?")]}

In [28]:
invoke_chain("What is my name?")

content="I'm sorry, but I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation. I am designed to respect user privacy and confidentiality. My primary function is to provide information and assist with tasks to the best of my abilities. How can I assist you today?"


# 5.8. Recap

# 6.0 RAG
- Retrieval Augmented Generation(검색 증강 생성)
- 검색 엔진이라는 것임

# 6.1. Data Loaders and Splitters 
- Retrieval
- Source > Load > Transform > Embed > Store > Retrieve

In [1]:
from langchain.document_loaders import TextLoader

loader = TextLoader("./files/Demian test.txt")
loader.load()

[Document(page_content='DEMI AN \n\n* \n\n\n\nHERMANN \n\nHESSE \n\nDEMIAN \n\n* \n\n\nTranslated by W. J. Strachan \n\n\n\nLondon \n\n\n\nPrologue \n\n\nI cannot tell my story without going a long way back. \nIf it were possible I would go back much farther still to \nthe very earliest years of my childhood and beyond them \nto my family origins. \n\nWhen poets write novels they are apt to behave as if \nthey were gods, with the power to look beyond and com- \nprehend any human story and serve it up as if the \nAlmighty himself, omnipresent, were relating it in all \nits naked truth. That I am no more able to do than the \npoets. But my story is more important to me than any \npoet’s story to him, for it is my own — and it is the story \nof a human being — not an invented, idealised person \nbut a real, live, unique being. What constitutes a real, \nlive human being is more of a mystery than ever these \ndays, and men — each one of whom is a valuable, unique \nexperiment on the part o

In [2]:
from langchain.chat_models import ChatOpenAI    
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("./files/Demian test.pdf")
loader.load()

[Document(page_content='DEMI AN  \n \n*  \n \n \n \nHERMANN  \n \nHESSE  \n \nDEMIAN  \n \n*  \n \n \nTranslated by W. J. Strachan  \n \n \n \nLondon  \n \n \n \nPrologue  \n \n ', metadata={'source': './files/Demian test.pdf', 'page': 0}),
 Document(page_content='I cannot tell my story without going a long way back.  \nIf it were possible I would go back much farther still to  \nthe very earliest years of my childhood and beyond them  \nto my family origins.  \n \nWhen poets write novels they are apt to behave as if  \nthey were gods, with the power to look beyond and com -  \nprehend any human story and serve it up as if the  \nAlmighty himself, omnipresent, were relating it in all  \nits naked truth. That I am no more able to do than the  \npoets. But my story is more important to me than any  \npoet’s story to him, for it is my own — and it is the story  \nof a human being — not an invented, idealised person  \nbut a real, live, unique being. What constitutes a real,  \nlive human 

In [4]:
from langchain.chat_models import ChatOpenAI    
from langchain.document_loaders import UnstructuredFileLoader

loader = UnstructuredFileLoader("./files/Demian test.docx")
loader.load()

[nltk_data] Downloading package punkt to /config/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /config/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


[Document(page_content='DEMI AN \n\nHERMANN \n\nHESSE \n\nDEMIAN \n\nTranslated by W. J. Strachan \n\nLondon \n\nPrologue \n\nI cannot tell my story without going a long way back. \n\n\n\nIf it were possible I would go back much farther still to \n\nthe very earliest years of my childhood and beyond them \n\nto my family origins. \n\nWhen poets write novels they are apt to behave as if \n\nthey were gods, with the power to look beyond and com- \n\nprehend any human story and serve it up as if the \n\nAlmighty himself, omnipresent, were relating it in all \n\nits naked truth. That I am no more able to do than the \n\npoets. But my story is more important to me than any \n\npoet’s story to him, for it is my own — and it is the story \n\nof a human being — not an invented, idealised person \n\nbut a real, live, unique being. What constitutes a real, \n\nlive human being is more of a mystery than ever these \n\ndays, and men — each one of whom is a valuable, unique \n\nexperiment on the pa

## Split the Documents
- docs들을 나눠야 함

In [14]:
from langchain.chat_models import ChatOpenAI    
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = UnstructuredFileLoader("./files/Demian test.docx")
splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=100
)
loader.load_and_split(text_splitter=splitter)
print(len(loader.load_and_split(text_splitter=splitter)))

144


- 특정 문자열을 기준으로 나누고 싶을 때

In [2]:
from langchain.chat_models import ChatOpenAI    
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter

loader = UnstructuredFileLoader("./files/Demian test.docx")
splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
print(len(loader.load_and_split(text_splitter=splitter)))

31


# 6.2. Tiktoken
- platform.openai.com/topkenizer
- OpenAI와 일치한 방법으로 counting하는 것.

In [4]:
from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/Demian test.docx")

# 6.3. Vectors
- https://turbomaze.github.io/word2vecjson/
- https://youtu.be/2eWuYf-aZE4?si=iWqaM8fgJpLmGeNq

# 6.4. Vector Store


In [8]:
from langchain.embeddings import OpenAIEmbeddings

embedder = OpenAIEmbeddings()

vector = embedder.embed_query("Hello, My name is HyeonSeung.")
len(vector)

1536

In [10]:
vector = embedder.embed_documents([
    "hi",
    "how",
    "are",
    "you doing"
])
print(len(vector),len(vector[0]))

4 1536


## Chroma : Vector Store 
- 이거 어떻게 작동하는지 알아야함

In [5]:
from langchain.vectorstores import Chroma 
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter


splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/Demian test.docx")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(docs, embeddings)

In [6]:
results = vectorstore.similarity_search("Why does the author describe himself not as a scholar, but as a seeker, and how does this self-perception influence his life and the story he tells?")
len(results)

4

In [14]:
from langchain.vectorstores import Chroma 
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/Demian test.docx")
docs = loader.load_and_split(text_splitter=splitter)


cache_dir = LocalFileStore("./.cache/")
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = Chroma.from_documents(docs, cached_embeddings)


In [15]:
cache_dir = LocalFileStore("./.cache/")
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = Chroma.from_documents(docs, cached_embeddings)

In [13]:
!mkdir .cache

- `![data-connection.jpg](images/data-connection.jpg)`
![data-connection.jpg](images/data-connection.jpg)

# 6.6. RetrievalQA
## LCEL
- LangChain Expression Language!! 
- [https://python.langchain.com/docs/modules/chains/document](https://python.langchain.com/docs/modules/chains/document)

In [4]:
from langchain.vectorstores import Chroma 
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import UnstructuredFileLoader


llm = ChatOpenAI(temperature=0.1)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/Demian test.docx")
docs = loader.load_and_split(text_splitter=splitter)


cache_dir = LocalFileStore("./.cache/")
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)
chain.run("Why does the author describe himself not as a scholar, but as a seeker, and how does this self-perception influence his life and the story he tells?")

"The author describes himself as a seeker rather than a scholar because he is not interested in acquiring knowledge from books or studying the stars. Instead, he seeks wisdom and understanding from within himself and from his own experiences. This self-perception as a seeker influences his life and the story he tells by emphasizing his personal journey of self-discovery and the lessons he learns along the way. He acknowledges that his story is not a pleasant one and does not possess the harmony of invented tales, but rather reflects the mixture of nonsense, chaos, madness, and dreams that make up the lives of those who have stopped deceiving themselves. This self-perception as a seeker allows the author to explore the complexities of human existence and the individual's quest for self-realization."

- Stuff
- Refine
- Map Reduce
- Map ReRank

In [5]:
chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)
chain.run("Why does the author describe himself not as a scholar, but as a seeker, and how does this self-perception influence his life and the story he tells?")

'The author describes himself as a seeker rather than a scholar because he is not interested in acquiring knowledge from books or studying the stars. Instead, he is drawn to the lessons that he hears whispering in his blood, suggesting a more intuitive and experiential approach to understanding the world. This self-perception as a seeker influences his life and the story he tells by emphasizing his personal journey of self-discovery and the importance of individual experiences. He acknowledges that his story is not a pleasant one and lacks the gentle harmony of invented tales, but rather is a mixture of nonsense, chaos, madness, and dreams. This suggests that his story is raw and authentic, reflecting the struggles and complexities of being human.'

# 6.7. Recap

1. load files
2. split
3. embeddings
4. vectorstore
5. Search
6. Retriever > RetrievalQA 
- [What is Retriever?](https://python.langchain.com/docs/modules/data_connection/retrievers)

# 6.8. Stuff LCEL Chain
- [https://python.langchain.com/docs/expression_language/interface](https://python.langchain.com/docs/expression_language/interface)
- ![LCEL](images/LCEL.png)

In [8]:
from langchain.vectorstores import Chroma , FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import UnstructuredFileLoader
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(temperature=0.1)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/Demian test.docx")
docs = loader.load_and_split(text_splitter=splitter)


cache_dir = LocalFileStore("./.cache/")
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = FAISS.from_documents(docs, cached_embeddings)


retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. if you don't know the answer just say you don't  know, don't make it up:\n{context}"),
    ("human", "{question}")
])

chain = {"context": retriever, 
        "question": RunnablePassthrough(),  
        } 
        | prompt 
        | llm

chain.invoke("Why does the author describe himself not as a scholar, but as a seeker, and how does this self-perception influence his life and the story he tells?")

AIMessage(content="The author describes himself as a seeker because he is constantly searching for knowledge and understanding. This self-perception influences his life and the story he tells by shaping his perspective and approach to life. As a seeker, he is open to new experiences and ideas, and he is willing to question and challenge conventional beliefs. This mindset leads him on a journey of self-discovery and exploration, which is reflected in the story he tells. His story is not a pleasant one, as it delves into the complexities of human existence and the struggles of finding one's true self. It is a mixture of nonsense and chaos, madness and dreams, reflecting the author's honest and unfiltered exploration of life.")

# 6.9. Map Reduce LCEL Chain!

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/Demian test.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()


map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("Why does the author describe himself not as a scholar, but as a seeker, and how does this self-perception influence his life and the story he tells?")

AIMessage(content="The author describes himself as a seeker rather than a scholar because he no longer seeks knowledge from books or the stars. Instead, he listens to the lessons whispered in his blood. This self-perception influences his life and the story he tells by emphasizing his personal journey of self-discovery and the importance of individual experiences. He acknowledges that his story may not possess the gentle harmony of invented tales, but rather reflects the mixture of nonsense, chaos, madness, and dreams that make up his life as a seeker. This perspective adds depth and authenticity to his narrative, highlighting the unique and significant nature of every individual's story.")