In [None]:
# 3.0 Predict Text with LLMs VS Chat Models

from langchain.llms.openai import OpenAI
from langchain.chat_models import ChatOpenAI

llm = OpenAI()
chat = ChatOpenAI()

a = llm.predict("How many planets are there?")
b = chat.predict("How many planets are there?")

a,b

In [None]:
# 3.1 Predict Messages with Chat Model

# import message constructors
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(
    temperature=0.1
)

messages = [
    SystemMessage(content="You are a geography expert. You only reply in Italian."),
    AIMessage(content="Ciao, mi chiamo Paolo."),
    HumanMessage(content="What is the distance btw Mexico and Thailand? Also, what is your name?"),
]

chat.predict_messages(messages)

In [None]:
# 3.2 PromptTemplate

from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

chat = ChatOpenAI(temperature=0.1)

template = PromptTemplate.from_template("What is the distance btw {country_a} and {country_b}")
prompt = template.format(country_a="Mexico", country_b="Thailand")

chat.predict(prompt)

In [None]:
# 3.2 PromptTemplate without from_template method

t = PromptTemplate(
    template="What is the capital of {country}",
    input_variables=["country"],
)

In [None]:
# 3.2 ChatPromptTemplate

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

chat = ChatOpenAI(temperature=0.1)

template = ChatPromptTemplate.from_messages([
    ("system", "You are a geographic expert. You only reply in {language}."),
    ("ai", "Ciao, Mi chiamo {name}!"),
    ("human", "What is the distance btw {country_a} and {country_b}"),
])

prompt = template.format_messages(
    language="Greek",
    name="Paolo",
    country_a="Mexico",
    country_b="Thailand",
)

chat.predict_messages(prompt)

In [None]:
# 3.3 OutputParser: LLM의 Text response를 list로 변환

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import BaseOutputParser

class CommaOutputParser(BaseOutputParser):
    def parse(self, text):
        list_items = text.strip().split(",")
        # ['Hello','How','are ',' you']
        return list(map(str.strip(), list_items))
        # ['Hello','How','are','you'] map each item with method.
    
p = CommaOutputParser()
p.parse("Hello,How,are , you")

chat = ChatOpenAI(temperature=0.1)
template = ChatPromptTemplate.from_messages([
    ("system", "You are a list generatin machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase. Do NOT reply with anything else."),
    ("human", "{question}"),
])

prompt = template.format_messages(
    max_items=10,
    question="What are the colours?",
)

result = chat.predict_messages(prompt)
# AIMessage(content='red, orange ... ')
p.parse(result.content)
# ['red', 'orange' ...]

In [None]:
# 3.3 LangChain Expression Language (LCEL)
from langchain.chat_models import ChatOpenAI
from langchain.schema import BaseOutputParser
from langchain.prompts import ChatPromptTemplate

chat = ChatOpenAI(temperature=0.1)

class CommaOutputParser(BaseOutputParser):
    def parse(self, text):
        items = text.strip().split(",")
        return list(map(str.strip, items))
p = CommaOutputParser()

template = ChatPromptTemplate.from_messages([
    ("system", "You are a list generatin machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase. Do NOT reply with anything else."),
    ("human", "{question}"),
])

chain = template | chat | CommaOutputParser()
chain.invoke({
    "max_items": 5,
    "question": "What are the Pokemons?",
})

In [None]:
# 3.4 Chaining Chains

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)

chef_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world-class international chef. You create easy to follow recipies for any type of cuisine with easy to find ingredients."),
    ("human", "I want to cook {cuisine} food.")
])

chef_chain = chef_prompt | chat

vegeterian_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a vegetarian chef specialized on making traditional recipes vegetarian. You find alternative ingredients and explain their preparation. You don't radically modify the recipe. If there is no alternative for a food just say you don't know how to recipe it."), ("human", "{recipe}")
])

veg_chain = vegeterian_prompt | chat

              # 2 Runnable Map       | # 3
final_chain = {"recipe": chef_chain} | veg_chain

# 1
final_chain.invoke({"cuisine":"indian"})

In [None]:
# 4.1 FewShotPromptTemplate

from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=StreamingStdOutCallbackHandler(),
)

examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

# Same as examples {variables}
example_template = """
    Human: {question}
    AI: {answer}
"""
example_prompt = PromptTemplate.from_template(example_template)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    # Same format as examples. Question of the user:
    suffix="Human: What do you know about {country}?",
    # validation:
    input_variables=["country"]
)
prompt.format(country="Germany")

chain = prompt | chat
chain.invoke({"country": "Germany"})

In [None]:
# 4.2 FewShotChatMessagePromptTemplate

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
    {
        "country": "France",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "country": "Italy",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "country": "Greece",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

example_prompt = ChatPromptTemplate.from_messages([
    ("human", "What do you know about {country}"),
    ("ai", "{answer}")
])
example_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

final_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a geography expert. You give short answers."),
    example_prompt,
    ("human", "What do you know about {country}?")
])

chain = final_prompt | chat
chain.invoke({"country": "Germany"})

In [None]:
# 4.3 LengthBasedExampleSelector

from langchain.prompts import ChatPromptTemplate, LengthBasedExampleSelector

examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]
example_prompt = ChatPromptTemplate.from_template("Human: {question}\nAI: {answer}")
example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=80,
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    suffix="Human: What do you know about {country}",
    input_variables=["country"],
)
prompt.format(country="Brazil")

In [None]:
# 4.3 Custom Example Selector
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.prompts.example_selector.base import BaseExampleSelector

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)
examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

class RandomExampleSelector(BaseExampleSelector):
    def __init__(self, examples):
        self.examples = examples

    def add_example(self, example):
        self.examples.append(example)

    def select_examples(self, input_variables):
        from random import choice
        return [choice(examples)]

example_prompt = PromptTemplate.from_template("Human: {question}\nAI: {answer}")
example_selector = RandomExampleSelector(examples=examples)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    suffix="Human: What do you know about {country}?",
    input_variables=["coutry"],
)

prompt.format(country="Brazil")

In [None]:
# 4.4 Serialization: Save and load prompt
from langchain.prompts import load_prompt

json_prompt = load_prompt("./prompt.json")
yaml_prompt = load_prompt("./prompt.yaml")

yaml_prompt.format(country="Germany")

In [None]:
# 4.4 Composition: Combine prompts
from langchain
from langchain.prompts.pipeline import PipelinePromptTemplate

intro = PromptTemplate.from_template(
    """
    You are a role-playing assistant.
    And you are impersonating a {character}.
"""
)

example = PromptTemplate.from_template(
    """
    This is an example of how you talk:

    Human: {example_question}
    You: {example_answer}
"""
)

start = PromptTemplate.from_template(
    """
    Start now!

    Human: {question}
    You:
"""
)

final = PromptTemplate.from_template(
    """
    {intro}

    {example}

    {start}
"""
)

prompts = [
    ("intro", intro),
    ("example", example),
    ("start", start),
]

full_prompt = PipelinePromptTemplate(
    final_prompt=final,
    pipeline_prompts=prompts,
)

full_prompt.format(
    character="Pirate",
    example_question="What is your location?",
    example_answer="Arrg!! This is a secret!! ARrrg",
    question="What is your fav food?"
)

chain = full_prompt | chat
chain.invoke({
    "character": "Pirate",
    "example_question": "What is your location?",
    "example_answer": "Arrg!! This is a secret!! ARrrg",
    "question": "What is your fav food?"
})

# 4.5~4.6 for save money.

In [None]:
# 4.5 Caching: Save and reuse all responses of LM
from langchain.chat_models import ChatOpenAI
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache, SQLiteCache

chat = ChatOpenAI(temperature=0.1)
set_llm_cache(InMemoryCache())

chat.predict("How do you make italian lasagna")

# database에 caching
set_llm_cache(SQLiteCache("cache.db"))

In [None]:
# 4.5 set_debug: show log
from langchain.globals import set_debug

set_debug(True)

In [None]:
# 4.6 요금/call

from langchain.callbacks import get_openai_callback

with get_openai_callback() as usage:
    a = chat.predict("What is the recipe for soju?")
    b = chat.predict("What is the recipe for sourdough?")
    print(a,b,"\n")
    print(usage)
    print(usage.total_cost)
    print(usage.prompt_tokens)
    print(usage.completion_tokens) # model usage

In [None]:
# 4.6 Serialization: llm settings 저장
from langchain.llms.openai import OpenAI
from langchain.llms.loading import load_llm

saved_llm = OpenAI(
    temperature=0.1,
    max_tokens=450,
    model="gpt-3.5-turbo"
)
saved_llm.save("model.json")

loaded_llm = load_llm("model.json")
loaded_llm

In [None]:
# 5.0 ConversationBufferMemory
from langchain.memory import ConversationBufferMemory

text_memory = ConversationBufferMemory()
chat_memory = ConversationBufferMemory(return_messages=True) # chat model이 사용할 수 있는 chat message 형태로 return

text_memory.save_context({"input": "Hi!"}, {"output": "How are you?"})
text_memory.load_memory_variables({})

In [None]:
# 5.1 ConversationBufferWindowMemory
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(
    return_messages=True,
    k=2, # How many messages to save
)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})
add_message(1,1)
add_message(2,2)
memory.load_memory_variables({})

add_message(3,3)
memory.load_memory_variables({})

In [None]:
# 5.2 ConversationSummaryMemory
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryMemory(llm=llm)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

def get_history():
    memory.load_memory_variables({})

add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")
add_message("South Kddorea is so pretty", "I wish I could go!!!")
get_history()

In [None]:
# 5.3 ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=150, # 메시지 요약 전 최대 토큰 수,
    return_messages=True,
)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})
def get_history():
    return memory.load_memory_variables({})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")
add_message("South Korea is so pretty", "I wish I could go!!!")
add_message("How far is Korea from Argentina?", "I don't know! Super far!")
add_message("How far is Brazil from Argentina?", "I don't know! Super far!")
get_history()

In [None]:
# 5.4 ConversationKGMemory
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationKGMemory

llm = ChatOpenAI(temperature=0.1)
memory = ConversationKGMemory(
    llm=llm,
    return_messages=True,
)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")
memory.load_memory_variables({"input": "Who is Nicolas"})

add_message("Nicolas likes kimchi", "Wow that is so cool!")
memory.load_memory_variables({"input": "What does Nicolas like"})

In [None]:
# 5.5 Memory on LLMChain(off-the-shelf;general-purpose)
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=80,
    memory_key="chat_history"
)

template = """
You are a helpful AI talking to Human.

{chat_history}
Human: {question}
You:
"""

chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=PromptTemplate.from_template(template),
    verbose=True # for debugging
)

chain.predict(question="My name is Bach")
chain.predict(question="I live in Seoul")
chain.predict(question="What is my name?")

In [None]:
#5.6 Chat Based Memory
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import LLMChain

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI talking to human."),
    MessagesPlaceholder(variable_name="chat_history"), # unlimited messages
    ("human", "{question}")
])

chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=prompt,
    verbose=True,
)

chain.predict(question="My name is Bach")
chain.predict(question="I live in Seoul")
chain.predict(question="What is my name?")

In [None]:
# 6.1 Data Loaders and Splitters
from langchain.document_loaders import TextLoader, PyPDFLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter

txt_loader = TextLoader("./files/document.txt")
pdf_loader = PyPDFLoader("./files/document.pdf")
loader = UnstructuredFileLoader("./files/document.pdf")
docs = loader.load()

# 문장이나 문단 단위로 나눈다.
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500, # max_characters
    chunk_overlap=100,
)
# 특정 문자열을 기준으로 나눈다.
line_break_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=50,
)

# chunk_size를 len(글자수)로 세지 않고, 모델과 같은 방식으로 token 갯수로 센다.
token_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

# 1번째 방법
splitter.split_documents(docs)
# 2번째 방법
len(loader.load_and_split(text_splitter=splitter))

64

In [None]:
#6.4 Embedding Models

from langchain.embeddings import OpenAIEmbeddings

embedder = OpenAIEmbeddings
embedder.embed_query("Hi")

vector = embedder.embed_documents([
    "how",
    "are",
    "you longer"
])

In [None]:
#6.4 Vector Store

from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.storage import LocalFileStore

loader = UnstructuredFileLoader("./files/document.docx")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()

cache_dir = LocalFileStore("./.cache/")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)
vectorstore.similarity_search("Where does Winston live?")

In [None]:
# 6.6 RetrievalQA: off-the-shelf chain

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.storage import LocalFileStore
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores.chroma import Chroma

llm = ChatOpenAI()

loader = UnstructuredFileLoader("./files/document.docx")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cache_dir = LocalFileStore("./.cache/")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(), # retrieve from
)
chain.run("Describe the Victory Mansions.")

In [None]:
# 6.8 Stuff LCEL Chain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.storage import LocalFileStore
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(temperature=0.1)

loader = UnstructuredFileLoader("./files/document.docx")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cache_dir = LocalFileStore("./.cache/")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer question using only following context. If you don't know the answer, just say you don't know. don't make it up: \n\n{context}"),
    ("human", "{question}")
])

chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
chain.invoke("Describe Victory Mansions")

In [None]:
# 6.9 Map Reduce LCEL Chain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

chat = ChatOpenAI(temperature=0.1)

# 3
map_doc_prompt = ChatPromptTemplate.from_messages([
    ("system", """
     Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatium.
     -----
     {context}
    """),
    ("human", "{question}"),
])

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents'],
    question = inputs['question']

    return "\n\n".join(
        map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question,
    }).content
    for doc in documents)

    ''' #3
    results = []
    
    for document in documents:
        result = map_doc_chain.invoke({
            "context": document.page_content,
            "question": question,
        }).content
        result.append(result)

    # 4
    results = "\n\n".join(results)

    return results
    '''

                        # 2
map_chain = {"documents": retriever, "question": RunnablePassthrough()} | RunnableLambda(map_docs) | llm

# 5
final_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     """
     Given the following extracted parts of an long document and a question, create a final answer.
     If you don't know the answer, just say you don't know. Don't try to make up an answer.
     ------
     {context}
     """),
     ("human", "{question}")
])

final_chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm
# 1
final_chain.invoke("Where does Winston go to work?")