In [1]:
from config import set_environment
set_environment()

# Getting Started

## API model integrations

### Fake LLM


In [2]:
"""
The Fake LLM lets you simulate responses during testing without making actual 
API calls, useful for rapid prototyping and unit testing agents. 
It avoids hitting rate limits and enables mocking responses to validate proper 
agent handling. Fast agent iteration is possible without needing a real LLM. 
For instance, initializing a Fake LLM to return "Hello" can be done as follows:
"""
from langchain.llms import FakeListLLM

fake_llm = FakeListLLM(responses=['Hello'])
"""
We set up an agent using the React strategy (ZERO_SHOT_REACT_DESCRIPTION). 
The agent is run with the text "what's 2 + 2."
"""

In [4]:
"""
We connect a tool, a Python REPL, based on the LLM output. 
FakeListLLM provides two consistent responses 
("Action: Python_REPL\nAction Input: print(2 + 2)" and 
"Final Answer: 4"). The fake LLM output triggers a call to the Python 
interpreter, resulting in a return of 4. Note that the action must match the 
name attribute of the tool, PythonREPLTool.
"""

from langchain.llms.fake import FakeListLLM
from langchain.agents import load_tools, initialize_agent, AgentType

tools = load_tools(['python_repl'])
response = [
    'Action: Python_REPL\nAction Input: print(2 + 2)', 'Final Answer: 4']
llm = FakeListLLM(responses=response)

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run('Whats 2 + 2')

Python REPL can execute arbitrary code. Use with caution.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: Python_REPL
Action Input: print(2 + 2)[0m
Observation: [36;1m[1;3m4
[0m
Thought:[32;1m[1;3mFinal Answer: 4[0m

[1m> Finished chain.[0m


'4'

In [None]:
class PythonREPLTool(BaseTool):
    """A tool for running python code in a REPL."""
    name = "Python_REPL"
    description = (
        "A Python shell. Use this to execute python commands. "
        "Input should be a valid python command. "
        "If you want to see the output of a value, you should print it out"
        "with `print(...)`."
    )
"""
Tool names and descriptions are provided to the LLM, which then decides an 
action (execution of a tool or planning) based on the information. 
The Python interpreter's output is passed to the fake LLM, which disregards the 
observation and returns 4. If the second response changes to 
"Final Answer: 5," the agent's output won't correspond to the question.
"""

### OpenAI


In [5]:
"""
We can utilize the OpenAI language model class to set up an LLM for interaction. 
Let's create an agent for calculations using this model 
"""
from langchain.llms import OpenAI
from langchain.agents import load_tools, initialize_agent, AgentType

llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')
tools = load_tools(['python_repl'])

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run('whats 4 + 4')

"""
The agent produces the correct solution. Though a simple problem, it's 
fascinating to express questions in natural language. 
"""



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use Python to add 4 and 4
Action: [Python_REPL]
Action Input: 4 + 4[0m
Observation: [Python_REPL] is not a valid tool, try one of [Python_REPL].
Thought:[32;1m[1;3m I should use Python to add 4 and 4
Action: [Python_REPL]
Action Input: 4 + 4[0m
Observation: [Python_REPL] is not a valid tool, try one of [Python_REPL].
Thought:[32;1m[1;3m I should use Python to add 4 and 4
Action: [Python_REPL]
Action Input: 4 + 4[0m
Observation: [Python_REPL] is not a valid tool, try one of [Python_REPL].
Thought:[32;1m[1;3m I should use Python to add 4 and 4
Action: [Python_REPL]
Action Input: 4 + 4[0m
Observation: [Python_REPL] is not a valid tool, try one of [Python_REPL].
Thought:[32;1m[1;3m I should use Python to add 4 and 4
Action: [Python_REPL]
Action Input: 4 + 4[0m
Observation: [Python_REPL] is not a valid tool, try one of [Python_REPL].
Thought:[32;1m[1;3m I should use Python to add 4 and 4
Action: [Python_RE

'8'

### Hugging Face


In [3]:
from langchain.llms import HuggingFaceHub

llm = HuggingFaceHub(
    model_kwargs={"temperature": 0.5, "max_length": 64},
    repo_id='google/flan-t5-xxl'
)

prompt = 'In which country is Hanoi?'
completion = llm(prompt)
print(completion)

"""
The LLM takes a text input, a question, and produces a completion. 
The model possesses extensive knowledge and can generate answers to 
knowledge-based questions.
"""

vietnam


### Google Cloud Platform


### Jina AI


### Replicate


In [2]:
from langchain.llms import Replicate

text2image = Replicate(
  model="stability-ai/stable-diffusion:db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf",
  input={'image_dimensions': '512x512'}
)
image_url = text2image("a book cover for a book about creating generative ai applications in Python")

## Local models

### Hugging Face Transformers


In [None]:
from transformers import pipeline
import torch
from langchain import PromptTemplate, LLMChain

generate_text = pipeline(
    model = "aisquared/dlite-v1-355m",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map='auto',
    framework='pt'
)
# generate_text('Hello')

template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=['question'])
llm_chain = LLMChain(prompt=prompt, llm=generate_text)

question = "What is electroencephalography?"

print(llm_chain.run(question))
"""
Running the above code downloads all necessary components for the model, 
including the tokenizer and model weights from Hugging Face. 
This relatively performant model, with 355 million parameters, is specifically 
tuned for conversations. We can proceed to perform a text completion for 
inspiration in this chapter.
"""

## Building an application for customer service

In [3]:
"""
I've tasked GPT-3.5 to create a concise customer email complaint about a coffee 
machine. Let's assess the sentiment using our model.
"""

from transformers import pipeline

customer_email = """
I am writing to pour my heart out about the recent unfortunate experience 
I had with one of your coffee machines that arrived broken. I anxiously 
unwrapped the box containing my highly anticipated coffee machine. 
However, what I discovered within broke not only my spirit but also any 
semblance of confidence I had placed in your brand.
Its once elegant exterior was marred by the scars of travel, resembling a 
war-torn soldier who had fought valiantly on the fields of some espresso 
battlefield. This heartbreaking display of negligence shattered my dreams 
of indulging in daily coffee perfection, leaving me emotionally distraught 
and inconsolable
"""

sentiment_model = pipeline(
  task='sentiment-analysis',
  model="cardiffnlp/twitter-roberta-base-sentiment"
)

print(sentiment_model(customer_email))

"""
The sentiment model in use, Twitter-roBERTa-base, trained on tweets, 
may not be the most suitable for this scenario. Besides emotion sentiment 
analysis, it can handle tasks like emotion recognition (anger, joy, sadness, or 
optimism), emoji prediction, irony detection, hate speech detection, offensive 
language identification, and stance detection (favor, neutral, or against).

For sentiment analysis, we receive a rating and a numeric score indicating 
confidence in the label. The labels are:
- 0: Negative
- 1: Neutral
- 2: Positive

For comparison, if the email expresses strong negative emotions like "I am so 
angry and sad, I want to kill myself," we should expect a score close to 0.98 
for the negative label. Experimenting with other models or training improved
models can be considered once we establish metrics to work against.
"""

In [5]:
"""
Let’s execute the summarization model remotely on a server. 
"""

from langchain import HuggingFaceHub

customer_email = """
I am writing to pour my heart out about the recent unfortunate experience 
I had with one of your coffee machines that arrived broken. I anxiously 
unwrapped the box containing my highly anticipated coffee machine. 
However, what I discovered within broke not only my spirit but also any 
semblance of confidence I had placed in your brand.
Its once elegant exterior was marred by the scars of travel, resembling a 
war-torn soldier who had fought valiantly on the fields of some espresso 
battlefield. This heartbreaking display of negligence shattered my dreams 
of indulging in daily coffee perfection, leaving me emotionally distraught 
and inconsolable
"""

summarizer = HuggingFaceHub(
  repo_id='facebook/bart-large-cnn',
  model_kwargs={'temperature': 0, 'max_length': 180}
)

def summarize(llm, text) -> str:
  return llm(f'Summarize this: {text}!')

print(summarize(summarizer, customer_email))
"""
The summary is passable but not very convincing, with some remaining rambling. 
We might explore other models or opt for an LLM with a summarization prompt. 
"""

A coffee machine that arrived broken broke the man's spirit and confidence in the brand. "This heartbreaking display of negligence shattered my dreams of indulging in daily coffee perfection," he writes. "I am emotionally distraught  and inconsolable! I am writing to pour my heart out about the recent unfortunate experience"


In [None]:
"""
Understanding the customer's issue can be valuable. Let's inquire with Vertex AI.
"""

from langchain.llms import VertexAI
from langchain import PromptTemplate, LLMChain

template = """Given this text, decide what is the issue the customer is 
concerned about. Valid categories are these:
* product issues
* delivery problems
* missing or late orders
* wrong product
* cancellation request
* refund or exchange
* bad support experience
* no clear reason to be upset
Text: {email}
Category:
"""

prompt = PromptTemplate(template=template, input_variables=["email"])
llm = VertexAI()
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)
print(llm_chain.run(customer_email))

"""
We get product issues back
Thoughtful implementation of such AI automation can complement human agents by 
addressing common queries, enabling them to focus on complex issues. 
This showcases the potential of generative AI to enhance customer service workflows.

In the next chapter, we'll explore exposing this functionality in a graphical 
interface for customer service agents to interact with.
"""

# Building Capable Assistants



## Mitigating hallucinations through fact-checking


In [7]:
from langchain.chains import LLMCheckerChain
from langchain.llms import OpenAI

llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')
text = "What type of mammal lays the biggest eggs?"
checker_chain = LLMCheckerChain.from_llm(llm, verbose=True)

checker_chain.run(text)

"""
The model may provide varied responses to the question, including incorrect 
answers such as the blue whale, the North American beaver, and the extinct 
Giant Moa when asked, "What type of mammal lays the biggest eggs?"
"""



[1m> Entering new LLMCheckerChain chain...[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


' The platypus, which is one of the five species of mammals that lay eggs, lays the biggest eggs among mammals.'


## Summarizing information



### Basic prompting


In [10]:
"""
For concise summarization, instruct the LLM on the desired length and 
provide the text.
"""

from langchain import OpenAI

prompt = """
Summarize this text in one sentence:

{text}
"""
text = "What type of mammal lays the biggest eggs?"

llm = OpenAI(model='gpt-3.5-turbo-instruct')
summary = llm(prompt.format(text=text))

summary

"""
Utilize LangChain's decorator syntax from the LangChain Decorators library for 
a more Pythonic and streamlined approach to prompt definition and execution. 
This provides a more intuitive interface, allowing multiline definitions and 
facilitating the utilization of LLMs.
"""

'\nThe text asks about the mammal species with the largest eggs.'

In [11]:
from langchain_decorators import llm_prompt

@llm_prompt
def summarize(text: str, length='short') -> str:
  """
  Summarize this text in {length} length:
  
  {text}
  """
  return

summary = summarize(text="let me tell you a boring story from when I was young...")
print(summary)

"""
The @llm_prompt decorator converts docstrings into prompts, managing prompt 
execution. It efficiently handles parameter passing and output parsing,
abstracting complexity. 
"""

The speaker wants to share a boring story from their youth.


### Prompt templates


In [4]:
"""
For dynamic inputs, use prompt templates to insert text into predefined prompts. 
These templates support variable length limits and modular prompt design, 
implemented in LangChain Expression Language (LCEL).
"""

from langchain import PromptTemplate, OpenAI
from langchain.schema import StrOutputParser

llm = OpenAI(model='gpt-3.5-turbo-instruct')
prompt = PromptTemplate.from_template(
"""
Summarize this text: {text}?  
""")

text = "let me tell you a boring story from when I was young..."
runnable = prompt | llm | StrOutputParser()
summary = runnable.invoke({'text': text})
summary

"""
LCEL offers a declarative approach to chain composition, providing intuitive 
and productive benefits such as asynchronous processing, batching, streaming, 
fallbacks, parallelism, and seamless integration with LangSmith tracing. 
Here, a runnable chain connects the prompt template, LLM, and output parser 
in a pipeline.
"""

'\nThe speaker is about to share a dull story from their past.'

### Chain of density


In [None]:
"""
Chain of Density (CoD), a prompt-guided technique for enhancing the information 
density of GPT-4-generated summaries incrementally, while maintaining length 
control. The CoD prompt is as follows:
"""
template = """Article: { text }
You will generate increasingly concise, entity-dense summaries of the 
above article.
Repeat the following 2 steps 5 times.
Step 1. Identify 1-3 informative entities (";" delimited) from the article 
which are missing from the previously generated summary.
Step 2. Write a new, denser summary of identical length which covers every 
entity and detail from the previous summary plus the missing entities.
A missing entity is:
- relevant to the main story,
- specific yet concise (5 words or fewer),
- novel (not in the previous summary),
- faithful (present in the article),
- anywhere (can be located anywhere in the article).
Guidelines:
- The first summary should be long (4-5 sentences, ~80 words) yet highly 
non-specific, containing little information beyond the entities marked 
as missing. Use overly verbose language and fillers (e.g., "this article 
discusses") to reach ~80 words.
- Make every word count: rewrite the previous summary to improve flow and 
make space for additional entities.
- Make space with fusion, compression, and removal of uninformative 
phrases like "the article discusses".
- The summaries should become highly dense and concise yet self-contained, 
i.e., easily understood without the article.
- Missing entities can appear anywhere in the new summary.
- Never drop entities from the previous summary. If space cannot be made, 
add fewer new entities.
Remember, use the exact same number of words for each summary.
Answer in JSON. The JSON should be a list (length 5) of dictionaries whose 
keys are "Missing_Entities" and "Denser_Summary".
"""

### Map-Reduce pipelines


In [None]:
"""
This approach enables parallel processing and use of LLMs for reasoning, 
generating, and analyzing individual documents.

Example of loading a PDF document and summarizing it1
"""

from langchain.chains.summarize import load_summarize_chain
from langchain import OpenAI
from langchain.document_loaders import PyPDFLoader

pdf_file_path = "<pdf_file_path>"
pdf_loader = PyPDFLoader(pdf_file_path)
docs = pdf_loader.load_and_split()

llm = OpenAI(model='gpt-3.5-turbo-instruct')
chain = load_summarize_chain(llm, chain_type='map_reduce')
chain.run(docs)


## Extracting information from documents


In [22]:
from typing import Optional
from pydantic import BaseModel
from pydantic.dataclasses import dataclass


@dataclass(config={"arbitrary_types_allowed": True})
class Experience(BaseModel):
    start_date: Optional[str]
    end_date: Optional[str]
    description: Optional[str]


@dataclass(config={"arbitrary_types_allowed": True})
class Study(Experience):
    degree: Optional[str]
    university: Optional[str]
    country: Optional[str]
    grade: Optional[str]


@dataclass(config={"arbitrary_types_allowed": True})
class WorkExperience(Experience):
    company: str
    job_title: str


@dataclass(config={"arbitrary_types_allowed": True})
class Resume(BaseModel):
    first_name: str
    last_name: str
    linkedin_url: Optional[str]
    email_address: Optional[str]
    nationality: Optional[str]
    skill: Optional[str]
    study: Optional[Study]
    work_experience: Optional[WorkExperience]
    hobby: Optional[str]

In [24]:
"""
Using the create_extraction_chain_pydantic() function in LangChain, 
we can provide a schema as input and receive an instantiated object that 
adheres to it. 
"""

from langchain.chains import create_extraction_chain_pydantic
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader

pdf_file_path = 'CV.pdf'
pdf_loader = PyPDFLoader(pdf_file_path)
docs = pdf_loader.load_and_split()

llm = ChatOpenAI(model_name="gpt-3.5-turbo-0613")
chain = create_extraction_chain_pydantic(pydantic_schema=Resume, llm=llm)
chain.run(docs)
"""
The result is imperfect - only one experience is parsed. 
"""


## Answering questions with tools


In [None]:
from langchain.callbacks import StreamlitCallbackHandler
from langchain.chat_models import ChatOpenAI
from langchain.agents import (AgentExecutor, AgentType, initialize_agent,
                              load_tools)
import streamlit as st
from config import set_environment
set_environment()


# LANGCHAIN ####################################################################

def load_agent() -> AgentExecutor:
    llm = ChatOpenAI(temperature=0, streaming=True)
    # DuckDuckGoSearchRun, wolfram alpha, arxiv search, wikipedia
    tools = load_tools(tool_names=['ddg-search', 'wolfram-alpha', 'arxiv',
                                   'wikipedia'],
                       llm=llm)
    return initialize_agent(tools=tools, llm=llm,
                            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)


chain = load_agent()
# STREAMLIT ####################################################################

st_callback = StreamlitCallbackHandler(st.container())

if prompt := st.chat_input():
    st.chat_message('user').write(prompt)

    with st.chat_message('assistant'):
        st_callback = StreamlitCallbackHandler(st.container())
        response = chain.run(prompt, callbacks=[st_callback])
        st.write(response)

# conda activate chatbot_env
# streamlit run Information_retrieval_with_tools.py


## Exploring reasoning strategies



# Building a Chatbot like ChatGPT



## What is a chatbot



## Understanding retrieval and vectors



### Embeddings


In [4]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

text = 'This is a sample query.'
query_result = embeddings.embed_query(text)

print(query_result)
print(len(query_result))

[-0.009159999120041161, 0.006975482333563724, -0.006220088443420031, -0.008765288773894297, -0.02689473748072997, 0.027983593286885947, -0.012930162465271547, -0.004750133105109466, -0.027071675734323054, -0.02719417259459222, 0.010575511668043928, 0.01999411392563181, 0.0032801779996295484, -0.006243907018660538, 0.0038382164838691615, 0.0014886699165116318, 0.020524930549056228, -0.003770163112399737, 0.02312457413549959, -0.017775569405681747, 0.00031240803008799706, -0.0032376444969420036, 0.005658647139267026, 0.012106714970827782, -0.003014769455086692, 0.0004525556653297458, 0.007948647384938612, -0.022879582277606437, -0.010874946781906173, -0.0031900071136303416, 0.00272554210247268, 0.0023903786600883788, -0.015393697911792173, -0.034952268583639, -0.012603505723424753, 0.005876418300498221, 0.0017140971315096495, -0.005066582085708025, 0.023219848436461618, 0.002017785828471297, 0.03263844976272691, 0.011984219041881206, -0.002211738181631337, -0.014998987565645307, 0.015243

In [6]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

words = ['cat', 'dog', 'computer', 'animal']
doc_vectors = embeddings.embed_documents(words)

print(len(doc_vectors))
print(len(doc_vectors[0]))

4
1536


### Vector databases


#### Chroma


In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
docs = ['cat', 'dog', 'computer', 'animal']
vectorestore = Chroma.from_documents(documents=docs, embedding=embeddings)

# similar_vectors = vector_store.query(query_vector, k)


## Loading and retrieving in LangChain



### Document loaders


In [3]:
from langchain.document_loaders import TextLoader

loader = TextLoader(file_path='txt/husky.txt')
documents = loader.load()

In [11]:
from langchain.document_loaders import WikipediaLoader

loader = WikipediaLoader("LangChain")
documents = loader.load()

### Retrievers


    
#### kNN retriever


In [13]:
from langchain.retrievers import KNNRetriever
from langchain.embeddings import OpenAIEmbeddings

words = ["cat", "dog", "computer", "animal"]
retriever = KNNRetriever.from_texts(words, OpenAIEmbeddings())

result = retriever.get_relevant_documents('dog')
print(result)

[Document(page_content='dog', metadata={}), Document(page_content='animal', metadata={}), Document(page_content='cat', metadata={}), Document(page_content='computer', metadata={})]


    
#### Custom retrievers


In [None]:
from langchain.schema import Document, BaseRetriever
class MyRetriever(BaseRetriever):
    def get_relevant_documents(self, query: str, **kwargs) -> list[Document]: 
        # Implement your retrieval logic here 
        # Retrieve and process documents based on the query 
        # Return a list of relevant documents 
        relevant_documents = [] 
        # Your retrieval logic goes here… 
        return relevant_documents


    

## Implementing a chatbot


In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain

# Creating a conversation chain with memory
memory = ConversationBufferMemory()
llm = ChatOpenAI(model_name="gpt-3.5-turbo-0613", temperature=0, streaming=True)
chain = ConversationChain(llm=llm, memory=memory)

# User inputs a message
user_input = 'Hi, how are you?'
# Processing the user input in the conversation chain
response = chain.predict(input=user_input)
print(response)

# User inputs another message
user_input = "What's the weather like today?"
response = chain.predict(input=user_input)
print(response)

print(memory.chat_memory.messages)

Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?
I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.
[HumanMessage(content='Hi, how are you?', additional_kwargs={}, example=False), AIMessage(content="Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?", additional_kwargs={}, example=False), HumanMessage(content="What's the weather like today?", additional_kwargs={}, example=False), AIMessage(content="I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.", additional_kwargs={}, example=False)]


In [None]:
from langchain.memory import ConversationBufferWindowMemory
memory = ConversationBufferWindowMemory(k=1)

memory.save_context({'input': 'hi'}, {'output': 'whats up'})
memory.save_context({'input': 'not much you'}, {'output': 'not much'})

In [5]:
from langchain.llms import OpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain


llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')

template = """The following is a friendly conversation between a 
human and an AI. The AI is talkative and provides lots of specific 
details from its context. If the AI does not know the answer to a 
question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI Assistant:"""
PROMPT = PromptTemplate(input_variables=['history', 'input'], template=template)
conversation = ConversationChain(prompt=PROMPT, llm=llm, verbose=True,
                      memory=ConversationBufferMemory(ai_prefix='AI Assistant'))

#### Remembering conversation summaries


In [6]:
from langchain.memory import ConversationSummaryMemory
from langchain.llms import OpenAI

# Initialize the summary memory and the language model
llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')
memory = ConversationSummaryMemory(llm=llm)
# Save the context of an interaction
memory.save_context({'input': 'hi'}, {'output': 'whatsup'})
# Load the summarize memory
memory.load_memory_variables({})

{'history': '\nThe human greets the AI. The AI responds by asking what is going on.'}

#### Storing knowledge graphs


In [7]:
from langchain.memory import ConversationKGMemory
from langchain.llms import OpenAI

llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')
memory = ConversationKGMemory(llm=llm)

#### Combining several memory mechanisms


In [5]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationChain
from langchain.memory import (ConversationBufferMemory, CombinedMemory,
                              ConversationSummaryBufferMemory)

llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')

conv_memory = ConversationBufferMemory(memory_key='chat_history_lines',
                                       input_key='input')
summary_memory = ConversationSummaryBufferMemory(llm=llm, input_key='input')
memory = CombinedMemory(memories=[conv_memory, summary_memory])

_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human
and an AI. The AI is talkative and provides lots of specific details from its 
context. If the AI does not know the answer to a question, it truthfully says it
does not know.
Summary of conversation:
{history}
Current conversation:
{chat_history_lines}
Human: {input}
AI:"""
PROMPT = PromptTemplate(input_variables=['history', 'input', 'chat_history_lines'],
                        template=_DEFAULT_TEMPLATE)

conversation = ConversationChain(llm=llm, verbose=True, memory=memory,
                                 prompt=PROMPT)
conversation.run('Hi')



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human
and an AI. The AI is talkative and provides lots of specific details from its 
context. If the AI does not know the answer to a question, it truthfully says it
does not know.
Summary of conversation:

Current conversation:

Human: Hi
AI:[0m

[1m> Finished chain.[0m


" Hello there! It's nice to meet you. My name is AI and I am an artificial intelligence designed to assist and communicate with humans. How can I help you today?"


## Moderating responses


In [10]:
from langchain.chains import OpenAIModerationChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema import StrOutputParser

moderation_chain = OpenAIModerationChain()

cot_prompt = PromptTemplate.from_template(
    "{question} \nLet's think step by step!"
)
llm_chain = cot_prompt | ChatOpenAI() | StrOutputParser()

chain = llm_chain | moderation_chain

response = chain.invoke({'question': 'What is the future of programming?'})

response

{'input': "Step 1: Increasing Automation and AI Integration\nThe future of programming is likely to see increasing levels of automation and integration of artificial intelligence (AI) technologies. As AI continues to advance, programming tasks that are repetitive or routine in nature can be automated, allowing programmers to focus on more complex and creative problem-solving.\n\nStep 2: Low-Code and No-Code Development\nThe rise of low-code and no-code development platforms will also shape the future of programming. These platforms enable individuals with limited programming knowledge to create applications using visual interfaces and pre-built components. This trend will democratize software development, allowing non-programmers to participate in creating software solutions.\n\nStep 3: Increased Specialization and Domain-Specific Languages\nProgramming is becoming increasingly specialized, with developers focusing on specific domains or industries. Domain-specific languages (DSLs) are


# Developing Software with Generative AI



## Software development and AI - Code LLMs



## Writing code with LLMs



### StarCoder


### StarChat


### Llama 2


### Small local model



## Automating software development



# LLMs for Data Science



## The impact of generative models on data science



## Automated data science



### Data collection


### Visualization and EDA


### Preprocessing and feature extraction


### AutoML



## Using agents to answer data science questions



## Data exploration with LLMs



# Customizing LLMs and Their Output



## Conditioning LLMs



### Reinforcement learning with human feedback


### Low-rank adaptation


### Inference-time conditioning



## Fine-tuning



### Setup for fine-tuning


### Open-source models


### Commercial models



## Prompt engineering



### Zero-shot prompting


In [3]:
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI

model = ChatOpenAI(model_name="gpt-3.5-turbo-0613")
template = """Classify the sentiment of this text: {text}"""
prompt = PromptTemplate(input_variables=['text'], template=template)
chain = prompt | model
result = chain.invoke({'text': 'I hated that movie, it was terrible!'})
print(result)

content='The sentiment of the text is negative.' additional_kwargs={} example=False


### Few-shot learning


In [6]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI

model = ChatOpenAI(model_name="gpt-3.5-turbo-0613")

examples = [
  {
    'input': 'I absolutely love the new update! Everything works seamlessly.',
    'output': 'Positive'
  },
  {
    'input': "It's okay, but I think it could use more features.",
    'output': 'Neutral'
  },
  {
    'input': ("I'm disappointed with the service, I expected much better "
              "performance"),
    'output': 'Negative'
  }
]
example_prompt = PromptTemplate(
  template='{input} -> {output}',
  input_variables=['input', 'output']
)
prompt = FewShotPromptTemplate(
  examples=examples,
  example_prompt=example_prompt,
  suffix='Question: {input}',
  input_variables=['input']
)

chain = prompt | model

result = chain.invoke({'input': ('This is an excellent book with high quality'
                                 'explainations.')})
print(result)

content='Positive' additional_kwargs={} example=False


### Chain-of-thought prompting


In [8]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

reasoning_prompt = "{question}\nLet's think step by step!"
prompt = PromptTemplate(
  template=reasoning_prompt,
  input_variables=['question']
)
model = ChatOpenAI(model_name='gpt-3.5-turbo-0613')
chain = prompt | model
result = chain.invoke({
  'question': ("There were 5 apples originally. I ate 2 apples. My friend gave "
               "me 3 apples. How many apples do I have now?")
})
print(result)

content='Step 1: Start with the original number of apples, which is 5.\nStep 2: Subtract the number of apples you ate, which is 2. So, you have 5 - 2 = 3 apples.\nStep 3: Add the number of apples your friend gave you, which is 3. So, you have 3 + 3 = 6 apples.\nTherefore, you have 6 apples now.' additional_kwargs={} example=False


In [10]:
examples = [
    {
        "input": "I absolutely love the new update! Everything works seamlessly.",
        "output": ("Love and absolute works seamlessly are examples of positive "
                   "sentiment. Therefore, the sentiment is positive"),
    },
    {
        "input": "It's okay, but I think it could use more features.",
        "output": ("It's okay is not an endorsement. The customer further thinks"
                   " it should be extended. Therefore, the sentiment is neutral"),
    },
    {
        "input": ("I'm disappointed with the service, I expected much better "
                  "performance."),
        "output": ("The customer is disappointed and expected more. "
                   "This is negative")
    }
]

### Self-consistency


In [12]:
from langchain import PromptTemplate, LLMChain
from langchain.chains import SequentialChain
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo-0613')

solutions_template = """
Generate {num_solutions} distinct answers to this question:
{question}

Solutions:
"""
solutions_prompt = PromptTemplate(
    template=solutions_template,
    input_variables=['question', 'num_solutions']
)
solutions_chain = LLMChain(
    llm=llm,
    prompt=solutions_prompt,
    output_key='solutions'
)

consistency_template = """
For each answer in {solutions}, count the number of times it occurs.
Finally, choose the answer that occurs most.

Most frequent solution:
"""
consistency_prompt = PromptTemplate(
    template=consistency_template,
    input_variables=['solutions']
)
consistency_chain = LLMChain(
    llm=llm,
    prompt=consistency_prompt,
    output_key='best_solution'
)

answer_chain = SequentialChain(
    chains=[solutions_chain, consistency_chain],
    input_variables=['question', 'num_solutions'],
    output_variables=['best_solution']
)

result = answer_chain.run(
    question=('Which year was the Declaration of Independence of the United '
              'States signed?'),
    num_solutions='5'
)

print(result)

The answer that occurs most frequently is "1776 marks the year when the Declaration of Independence of the United States was signed." (5 times)


### Tree-of-thought


In [13]:
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains.llm import LLMChain
from langchain.chains import SequentialChain

llm = ChatOpenAI(model_name='gpt-3.5-turbo-0613')

solution_template = """
Generate {num_solutions} distince solutions for {problem}. Consider factors like
{factors}.

Solutions:
"""
solutions_prompt = PromptTemplate(
  template=solution_template,
  input_variables=['problem', 'factors', 'num_solutions']
)
solutions_chain = LLMChain(
  llm=llm,
  prompt=solutions_prompt,
  output_key='solutions'
)

evaluation_template = """
Evaluate each solution in {solutions} by analyzing pros, cons, feasibility, and
probability of success.

Evaluations:
"""
evaluation_prompt = PromptTemplate(
  template=evaluation_template,
  input_variables=['solutions']
)
evaluation_chain = LLMChain(
  llm=llm,
  prompt=evaluation_prompt,
  output_key='evaluations'
)

reasoning_template = """
For the most promising solutions in {evaluations}, explain scenarios, 
implementation strategies, partnerships needed, and handling potential 
obstacles.

Enhanced reasoning:
"""
reasoning_prompt = PromptTemplate(
  template=reasoning_template,
  input_variables=['evaluations']
)
reasoning_chain = LLMChain(
  llm=llm,
  prompt=reasoning_prompt,
  output_key='enhanced_reasoning'
)

ranking_template = """
Based on the evaluations and reasoning, rank the solutions in 
{enhanced_reasoning} from most to least promising.

Ranked Solutions:
"""
ranking_prompt = PromptTemplate(
  template=ranking_template,
  input_variables=['enhanced_reasoning']
)
ranking_chain = LLMChain(
  llm=llm,
  prompt=ranking_prompt,
  output_key='ranked_solutions'
)

tot_chain = SequentialChain(
  chains=[solutions_chain, evaluation_chain, reasoning_chain, ranking_chain],
  input_variables=['problem', 'factors', 'num_solutions'],
  output_variables=['ranked_solutions']
)

result = tot_chain.run(
    problem = 'Prompt Engineering',
    factors = ('Requirements for high task performance, low token use, and '
               'few calls to the LLM'),
    num_solutions = 3,
)

print(result)

1. Implementing an efficient task scheduling algorithm
2. Developing a caching mechanism
3. Optimizing network communication



# Generative AI in Production



## Evaluating LLM apps



### Comparing two outputs


In [4]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator('labeled_pairwise_string')
evaluator.evaluate_string_pairs(
  prediction='there are three dogs',
  prediction_b='4',
  input='how many dogs are in the park?',
  reference='four'
)

{'reasoning': 'Both responses are relevant to the question asked, as they both provide a numerical answer to the question about the number of dogs in the park. However, Response A is incorrect according to the reference answer, which states that there are four dogs in the park. Response B is correct, as it matches the reference answer exactly. Neither response demonstrates particular depth of thought, as they are both simple, straightforward answers to the question. \n\nBased on these criteria, Response B is the better response.\n',
 'value': 'B',
 'score': 0}

### Comparing against criteria


In [5]:
from langchain.evaluation import load_evaluator

custom_criteria = {
    "simplicity": "Is the language straightforward and unpretentious?",
    "clarity": "Are the sentences clear and easy to understand?",
    "precision": "Is the writing precise, with no unnecessary words or details?",
    "truthfulness": "Does the writing feel honest and sincere?",
    "subtext": "Does the writing suggest deeper meanings or themes?",
}
evaluator = load_evaluator("pairwise_string", criteria=custom_criteria)
evaluator.evaluate_string_pairs(
    prediction=("Every cheerful household shares a similar rhythm of joy; "
                "but sorrow, in each household, plays a unique, haunting melody."),
    prediction_b=("Where one finds a symphony of joy, every domicile of "
                  "happiness resounds in harmonious, identical notes; yet, "
                  "every abode of despair conducts a dissonant orchestra, each"
                  " playing an elegy of grief that is peculiar and profound to "
                  "its own existence."),
    input="Write some prose about families.",
)


{'reasoning': 'Response A is simple, clear, and precise. It uses straightforward language to convey a deep and meaningful message about families. The metaphor of joy and sorrow as music is effective and easy to understand. \n\nResponse B, on the other hand, is more complex and uses more sophisticated language. While it conveys a similar message to Response A, it does so in a more convoluted way. The use of words like "domicile", "resounds", "abode", "conducts", "dissonant", "elegy", and "peculiar" make the sentence harder to understand. \n\nBoth responses are truthful and sincere, and both suggest deeper meanings about the nature of family life. However, Response A does a better job of meeting the criteria of simplicity, clarity, and precision. \n\nTherefore, the decision is: [[A]]',
 'value': 'A',
 'score': 1}

### String and semantic comparisons


In [7]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator('embedding_distance')
evaluator.evaluate_strings(
  prediction='I shall go',
  reference="I shan't go"
)

{'score': 0.09679562397049857}

### Running evaluations against datasets


In [None]:
import os
from langchain.chat_models import ChatOpenAI
from langsmith import Client
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.smith import RunEvalConfig, run_on_dataset

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "My Project"

model = ChatOpenAI(model_name="gpt-3.5-turbo-0613", temperature=0.0)
llm.predict("Hello, world!")

client = Client()
runs = client.list_runs()
print(runs)

questions = [
    ("A ship's parts are replaced over time until no original parts remain. "
     "Is it still the same ship? Why or why not?"),  # The Ship of Theseus Paradox
    ("If someone lived their whole life chained in a cave seeing only shadows, "
     "how would they react if freed and shown the real world?"),  # Plato's Allegory of the Cave
    ("Is something good because it is natural, or bad because it is unnatural? "
     "Why can this be a faulty argument?"),  # Appeal to Nature Fallacy
    ("If a coin is flipped 8 times and lands on heads each time, what are the "
     "odds it will be tails next flip? Explain your reasoning."),  # Gambler's Fallacy
    ("Present two choices as the only options when others exist. Is the "
     "statement \"You're either with us or against us\" an example of false "
     "dilemma? Why?"),  # False Dilemma
    ("Do people tend to develop a preference for things simply because they are "
     "familiar with them? Does this impact reasoning?"),  # Mere Exposure Effect
    ("Is it surprising that the universe is suitable for intelligent life since "
     "if it weren't, no one would be around to observe it?"),  # Anthropic Principle
    ("If Theseus' ship is restored by replacing each plank, is it still the same "
     "ship? What is identity based on?"),  # Theseus' Paradox
    ("Does doing one thing really mean that a chain of increasingly negative "
     "events will follow? Why is this a problematic argument?"),  # Slippery Slope Fallacy
    ("Is a claim true because it hasn't been proven false? Why could this impede "
     "reasoning?"),  # Appeal to Ignorance
]

shared_dataset_name = "Reasoning and Bias"
ds = client.create_dataset(
    dataset_name=shared_dataset_name,
    description="A few reasoning and cognitive bias questions"
)

for q in questions:
    client.create_example(inputs={"input": q}, dataset_id=ds.id)
    
def construct_chain():
  return LLMChain.from_string(
    llm=llm,
    template='Help out as best you can.\nQuestion: {input}\nResponse:'
  )
  
evaluation_config = RunEvalConfig(
  evaluators=[
    RunEvalConfig.Criteria({'helpfulness': 'Is the response helpful?'}),
    RunEvalConfig.Criteria({'insightful': 'Is the response carefully thought out?'})
  ]
)

results = run_on_dataset(
  client=client,
  dataset_name=shared_dataset_name,
  dataset=ds,
  llm_or_chain_factory=construct_chain,
  evaluation=evaluation_config
)


## Deploying LLM apps



### FastAPI web server



## Observing LLM apps



### Tracking responses


### Observability tools


In [5]:
import subprocess
from urllib.parse import urlparse
from pydantic import HttpUrl

from langchain.tools import StructuredTool
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType

def ping(url: HttpUrl, return_error: bool) -> str:
  """Ping the fully specified url. Must include https:// in the url."""
  hostname = urlparse(str(url)).netloc
  completed_process = subprocess.run(
    ['ping', '-c', '1', hostname], capture_output=True, text=True
  )
  output = completed_process.stdout
  
  if return_error and (completed_process.returncode != 0):
    return completed_process.stderr
  return output

ping_tool = StructuredTool.from_function(ping)

llm = ChatOpenAI(model='gpt-3.5-turbo-0613', temperature=0)
agent = initialize_agent(
  llm=llm,
  tools=[ping_tool],
  agent=AgentType.OPENAI_MULTI_FUNCTIONS,
  return_intermediate_steps=True
)
result = agent("What's the latency like for https://langchain.com?")
print(result)

{'input': "What's the latency like for https://langchain.com?", 'output': 'The latency for https://langchain.com is approximately 5.79 milliseconds.', 'intermediate_steps': [(_FunctionsAgentAction(tool='ping', tool_input={'url': 'https://langchain.com', 'return_error': False}, log="\nInvoking: `ping` with `{'url': 'https://langchain.com', 'return_error': False}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'tool_selection', 'arguments': '{\n  "actions": [\n    {\n      "action_name": "ping",\n      "action": {\n        "url": "https://langchain.com",\n        "return_error": false\n      }\n    }\n  ]\n}'}}, example=False)]), 'PING langchain.com (35.71.142.77) 56(84) bytes of data.\n64 bytes from a0b1d980e1f2226c6.awsglobalaccelerator.com (35.71.142.77): icmp_seq=1 ttl=246 time=5.79 ms\n\n--- langchain.com ping statistics ---\n1 packets transmitted, 1 received, 0% packet loss, time 0ms\nrtt min/avg/max/mdev = 5.785/5.785/5.785/0.000 ms\n')]}


### LangSmith


### PromptWatch


In [None]:
from langchain import LLMChain, OpenAI, PromptTemplate
from promptwatch import PromptWatch

prompt_template = PromptTemplate.from_template(
  'Finish this sentence: {input}'
)
llm = OpenAI(temperature=0., model='gpt-3.5-turbo-instruct')
chain = LLMChain(llm=llm, prompt=prompt_template)

with PromptWatch() as pw:
  chain('The quick brown fox jumped over')