# LangChain Experimentation

Experiment with LangChain and OpenAI API.

In [69]:
# Import Standard Libraries
import os
import openai

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.memory import ConversationBufferWindowMemory
from langchain.memory import ConversationTokenBufferMemory
from langchain.memory import ConversationSummaryBufferMemory

from langchain.chains import LLMChain
from langchain.chains import SimpleSequentialChain
from langchain.chains import SequentialChain

from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain,RouterOutputParser
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from IPython.display import display, Markdown
from langchain.embeddings import OpenAIEmbeddings

from langchain.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL

# OpenAI API

In [2]:
# Set OpenAI API Key
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
def get_inference(prompt: str, 
                  model: str = 'gpt-3.5-turbo') -> str:
    
    # Compose the request body
    messages = [
        {'role': 'user',
         'content': prompt}]
    
    # Send the request and retrieve the response
    response = openai.ChatCompletion.create(model=model, 
                                            messages=messages, 
                                            temperature=0)
    
    return response.choices[0].message['content']

In [4]:
#get_inference('What is 1+1?')

# Open AI Chat

In [5]:
# Instantiate a ChatOpenAI object
# NOTE: temperature at 0.0 reduces the noise, but also generalization capabilities
# Note2: it retrieves the API KEY from 'openai.api_key'
langchain_chat = ChatOpenAI(temperature=0.0)

# Prompt Template

In [6]:
# Define prompt template string
string_prompt_template = """Translate the text \
that is delimited by triple backticks \
into a style that is {style}. \
text: ```{text}```
"""

In [7]:
# Initialise a Prompt Template
langchain_prompt_template = ChatPromptTemplate.from_template(string_prompt_template)

In [8]:
langchain_prompt_template

ChatPromptTemplate(input_variables=['style', 'text'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['style', 'text'], output_parser=None, partial_variables={}, template='Translate the text that is delimited by triple backticks into a style that is {style}. text: ```{text}```\n', template_format='f-string', validate_template=True), additional_kwargs={})])

In [9]:
# Retrieve prompt input variables
langchain_prompt_template.messages[0].prompt.input_variables

['style', 'text']

In [10]:
# Define a prompt style
prompt_style = """American english \
in a calm and respectful tone.
"""

In [11]:
# Define the prompt text
prompt_text = """Arr, I be fuming that me blender lid \
flew off and splattered me kitchen walls \
with smoothie! And to make matters worse, \
the warranty don't cover the cost of \
cleaning up me kitchen. I need your help \
right now, mattey!
"""

In [12]:
# Generate the message from the prompt style and text
message = langchain_prompt_template.format_messages(style=prompt_style, text=prompt_text)

In [13]:
message[0]

HumanMessage(content="Translate the text that is delimited by triple backticks into a style that is American english in a calm and respectful tone.\n. text: ```Arr, I be fuming that me blender lid flew off and splattered me kitchen walls with smoothie! And to make matters worse, the warranty don't cover the cost of cleaning up me kitchen. I need your help right now, mattey!\n```\n", additional_kwargs={}, example=False)

In [14]:
# Get the response
#message_response = langchain_chat(message)

# Parser

It is used to extract information from LLM’s output in a machine-readable format (e.g., JSON from LLM’s output text).

In [15]:
# Expected JSON output
{
  "gift": False,
  "delivery_days": 5,
  "price_value": "pretty affordable!"
}

{'gift': False, 'delivery_days': 5, 'price_value': 'pretty affordable!'}

In [16]:
# Input customer review
customer_review = """\
This leaf blower is pretty amazing.  It has four settings:\
candle blower, gentle breeze, windy city, and tornado. \
It arrived in two days, just in time for my wife's \
anniversary present. \
I think my wife liked it so much she was speechless. \
So far I've been the only one using it, and I've been \
using it every other morning to clear the leaves on our lawn. \
It's slightly more expensive than the other leaf blowers \
out there, but I think it's worth it for the extra features.
"""

# Prompt tempalte
review_template = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product \
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

Format the output as JSON with the following keys:
gift
delivery_days
price_value

text: {text}
"""

In [17]:
# Create prompt template
prompt_template = ChatPromptTemplate.from_template(review_template)
print(prompt_template)

input_variables=['text'] output_parser=None partial_variables={} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], output_parser=None, partial_variables={}, template='For the following text, extract the following information:\n\ngift: Was the item purchased as a gift for someone else? Answer True if yes, False if not or unknown.\n\ndelivery_days: How many days did it take for the product to arrive? If this information is not found, output -1.\n\nprice_value: Extract any sentences about the value or price,and output them as a comma separated Python list.\n\nFormat the output as JSON with the following keys:\ngift\ndelivery_days\nprice_value\n\ntext: {text}\n', template_format='f-string', validate_template=True), additional_kwargs={})]


In [18]:
# Query the LLM
messages = prompt_template.format_messages(text=customer_review)
chat = ChatOpenAI(temperature=0.0)
#response = chat(messages)
#print(response.content)

In [19]:
# You will get an error by running this line of code 
# because'gift' is not a dictionary
# 'gift' is a string
#response.content.get('gift')

In [20]:
# Define a set of schemas to extract the information from the customer review
gift_schema = ResponseSchema(name="gift",
                             description="Was the item purchased\
                             as a gift for someone else? \
                             Answer True if yes,\
                             False if not or unknown.")
delivery_days_schema = ResponseSchema(name="delivery_days",
                                      description="How many days\
                                      did it take for the product\
                                      to arrive? If this \
                                      information is not found,\
                                      output -1.")
price_value_schema = ResponseSchema(name="price_value",
                                    description="Extract any\
                                    sentences about the value or \
                                    price, and output them as a \
                                    comma separated Python list.")

response_schemas = [gift_schema, 
                    delivery_days_schema,
                    price_value_schema]

In [21]:
# Instantiate the Output Parser from the above define schemas
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [22]:
# Reitreve instructions that the prompt will pass to the LLM to have an output able to be parsed
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"gift": string  // Was the item purchased                             as a gift for someone else?                              Answer True if yes,                             False if not or unknown.
	"delivery_days": string  // How many days                                      did it take for the product                                      to arrive? If this                                       information is not found,                                      output -1.
	"price_value": string  // Extract any                                    sentences about the value or                                     price, and output them as a                                     comma separated Python list.
}
```


In [23]:
# Define new prompt
review_template_2 = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product\
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

text: {text}

{format_instructions}
"""

# Define new prompt template
prompt = ChatPromptTemplate.from_template(template=review_template_2)

# Create a message with the format instruction
messages = prompt.format_messages(text=customer_review, 
                                format_instructions=format_instructions)

In [24]:
# Query the LLM
#response = langchain_chat(messages)

In [25]:
# Parse the output
#output_dict = output_parser.parse(response.content)

# Memory

## Conversation Buffer Memory

It helps to feed to the LLM the history of the current conversation.

In [26]:
# Define an Open AI Chat
llm = ChatOpenAI(temperature=0.0)

# Create a Conversation Memory
memory = ConversationBufferMemory()

# Create a Conversation that allows to interact with the LLM while keeping the memory of the conversation
conversation = ConversationChain(
    llm=llm, 
    memory=memory,
    verbose=True
)

In [27]:
# First prompt
#conversation.predict(input="Hi, my name is Simone")

In [28]:
# Intermediate prompt
#conversation.predict(input="What is 1+1?")

In [29]:
# Final prompt (it remembers my name)
#conversation.predict(input="What is my name?")

By setting `verbose=True` the ChatOpenAI object will feed the history of the conversation every time as part of the next prompt.

In [30]:
#print(memory.buffer)

The above object is keeping track of the conversation history.

In [31]:
# It is possible to restore the conversation into another ConversationBufferMemory
memory = ConversationBufferMemory()

# Restore
memory.save_context({"input": "Hi"}, 
                    {"output": "What's up"})

print(memory.buffer)

Human: Hi
AI: What's up


In [32]:
# Top up
memory.save_context({"input": "Not much, just hanging"}, 
                    {"output": "Cool"})

In [33]:
print(memory.buffer)

Human: Hi
AI: What's up
Human: Not much, just hanging
AI: Cool


## Conversation Buffer Window Memory

Upon sending each time the whole conversation history to LLM, the prompt starts to begin quite long in terms of tokens (and thus expensive). The Conversation Buffer Window Memory allows to keep just a subset of the whole conversation. The last `k` interactions.

In [34]:
# Instantiate a Conversation Window of one history
memory = ConversationBufferWindowMemory(k=1)               

In [35]:
# Set history
memory.save_context({"input": "Hi"},
                    {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"},
                    {"output": "Cool"})

In [36]:
# Only last interaction is kept
memory.load_memory_variables({})

{'history': 'Human: Not much, just hanging\nAI: Cool'}

## Conversation Token Buffer Memory

It remembers only the last k tokens of the conversation.

In [37]:
# Setup the conversation token buffer
memory = ConversationTokenBufferMemory(llm=llm, max_token_limit=30)

# Set history
memory.save_context({"input": "AI is what?!"},
                    {"output": "Amazing!"})
memory.save_context({"input": "Backpropagation is what?"},
                    {"output": "Beautiful!"})
memory.save_context({"input": "Chatbots are what?"}, 
                    {"output": "Charming!"})

In [38]:
memory.load_memory_variables({})

{'history': 'AI: Beautiful!\nHuman: Chatbots are what?\nAI: Charming!'}

## Conversation Summary Memory

It uses the LLM to write a summary of the conversation so far!

In [42]:
# create a long string
schedule = "There is a meeting at 8am with your product team. \
You will need your powerpoint presentation prepared. \
9am-12pm have time to work on your LangChain \
project which will go quickly because Langchain is such a powerful tool. \
At Noon, lunch at the italian resturant with a customer who is driving \
from over an hour away to meet you to understand the latest in AI. \
Be sure to bring your laptop to show the latest LLM demo."

# Define the Conversation Summary
memory = ConversationSummaryBufferMemory(llm=llm, 
                                         max_token_limit=100)

# Set history
memory.save_context({"input": "Hello"}, {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"},
                    {"output": "Cool"})
# memory.save_context({"input": "What is on the schedule today?"}, 
#                     {"output": f"{schedule}"})

# Chains

A chain is a combination of a LLM with a generic input prompt. It is possible to combine input/output sequence of LLMs prompts and related outputs.

The idea is to combine multiple chgains where the output of one is the input of the next one.


**Types:**
- Simple Sequential Chain: Single input/output
- Sequential Chain: Multiple inputs/outputs

In [43]:
# Define LLM
llm = ChatOpenAI(temperature=0.9)

In [44]:
# Define prompt
prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe \
    a company that makes {product}?"
)

In [45]:
prompt.input_variables

['product']

In [46]:
# Chain LLM and Prompt
chain = LLMChain(llm=llm, prompt=prompt)

In [47]:
# Execute the chain: pass the prompt to the LLM
product = "Queen Size Sheet Set"
#chain.run(product) # Output: "Royal Comfort Linens"

## Simple Sequential Chains

The chain links a single output of a Prompt + LLM chain as an input to just another chain.

In [48]:
# Define LLM

llm = ChatOpenAI(temperature=0.9)

# Define prompt template for first chain
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe \
    a company that makes {product}?"
)

# First chain
chain_one = LLMChain(llm=llm, prompt=first_prompt)


# Define prompt template for second chain
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 words description for the following \
    company:{company_name}"
)

# Chain 2
chain_two = LLMChain(llm=llm, prompt=second_prompt)

In [49]:
# Define Simple chain of chain_one + chain_two
overall_simple_chain = SimpleSequentialChain(chains=[chain_one, chain_two],
                                             verbose=True)

In [50]:
#overall_simple_chain.run(product)

> Entering new SimpleSequentialChain chain...
One possible name to describe a company that makes Queen Size Sheet Sets could be "RoyalRest Linens" or "MajesticSleep Bedding". These names evoke a sense of luxury and elegance that align with the product being offered.
"RoyalRest Linens" and "MajesticSleep Bedding" are companies specializing in luxurious and elegant Queen Size Sheet Sets.

> Finished chain.
'"RoyalRest Linens" and "MajesticSleep Bedding" are companies specializing in luxurious and elegant Queen Size Sheet Sets.'

## Simple Chains

It combines multiple chains together.

In [51]:
llm = ChatOpenAI(temperature=0.9)

# prompt template 1: translate to english
first_prompt = ChatPromptTemplate.from_template(
    "Translate the following review to english:"
    "\n\n{Review}"
)
# chain 1: input= Review and output= English_Review
chain_one = LLMChain(llm=llm, prompt=first_prompt, 
                     output_key="English_Review"
                    )


In [52]:
second_prompt = ChatPromptTemplate.from_template(
    "Can you summarize the following review in 1 sentence:"
    "\n\n{English_Review}"
)
# chain 2: input= English_Review and output= summary
chain_two = LLMChain(llm=llm, prompt=second_prompt, 
                     output_key="summary"
                    )


In [53]:
# prompt template 3: translate to english
third_prompt = ChatPromptTemplate.from_template(
    "What language is the following review:\n\n{Review}"
)
# chain 3: input= Review and output= language
chain_three = LLMChain(llm=llm, prompt=third_prompt,
                       output_key="language"
                      )


In [54]:

# prompt template 4: follow up message
fourth_prompt = ChatPromptTemplate.from_template(
    "Write a follow up response to the following "
    "summary in the specified language:"
    "\n\nSummary: {summary}\n\nLanguage: {language}"
)
# chain 4: input= summary, language and output= followup_message
chain_four = LLMChain(llm=llm, prompt=fourth_prompt,
                      output_key="followup_message"
                     )


In [55]:
# overall_chain: input= Review 
# and output= English_Review,summary, followup_message
overall_chain = SequentialChain(
    chains=[chain_one, chain_two, chain_three, chain_four],
    input_variables=["Review"],
    output_variables=["English_Review", "summary","followup_message"],
    verbose=True
)

In [56]:
# Some sort of review
# overall_chain(review)

## Router Chains

Depending on the matched criteria, it routes the output to a specific chain with respect to another one.

In [57]:
# Define different prompt templates for different topics
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise\
and easy to understand manner. \
When you don't know the answer to a question you admit\
that you don't know.

Here is a question:
{input}"""


math_template = """You are a very good mathematician. \
You are great at answering math questions. \
You are so good because you are able to break down \
hard problems into their component parts, 
answer the component parts, and then put them together\
to answer the broader question.

Here is a question:
{input}"""

history_template = """You are a very good historian. \
You have an excellent knowledge of and understanding of people,\
events and contexts from a range of historical periods. \
You have the ability to think, reflect, debate, discuss and \
evaluate the past. You have a respect for historical evidence\
and the ability to make use of it to support your explanations \
and judgements.

Here is a question:
{input}"""


computerscience_template = """ You are a successful computer scientist.\
You have a passion for creativity, collaboration,\
forward-thinking, confidence, strong problem-solving capabilities,\
understanding of theories and algorithms, and excellent communication \
skills. You are great at answering coding questions. \
You are so good because you know how to solve a problem by \
describing the solution in imperative steps \
that a machine can easily interpret and you know how to \
choose a solution that has a good balance between \
time complexity and space complexity. 

Here is a question:
{input}"""

In [58]:
# Define the routes
prompt_infos = [
    {
        "name": "physics", 
        "description": "Good for answering questions about physics", 
        "prompt_template": physics_template
    },
    {
        "name": "math", 
        "description": "Good for answering math questions", 
        "prompt_template": math_template
    },
    {
        "name": "History", 
        "description": "Good for answering history questions", 
        "prompt_template": history_template
    },
    {
        "name": "computer science", 
        "description": "Good for answering computer science questions", 
        "prompt_template": computerscience_template
    }
]

In [59]:
# Define the LLM
llm = ChatOpenAI(temperature=0)

In [60]:
# Initialize dictionary for destination chains
destination_chains = {}

# Fetch the routes
for p_info in prompt_infos:
    
    # Extract name
    name = p_info["name"]
    
    # Extract the prompt tempalte
    prompt_template = p_info["prompt_template"]
    
    # Construct the prompt template
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    
    # Let's create one destination chain for each prompt
    chain = LLMChain(llm=llm, prompt=prompt)
    
    # Add the destination chain to the dictionary
    destination_chains[name] = chain  
    
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

In [61]:
destinations

['physics: Good for answering questions about physics',
 'math: Good for answering math questions',
 'History: Good for answering history questions',
 'computer science: Good for answering computer science questions']

In [62]:
# Define the default chain
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=llm, prompt=default_prompt)

In [63]:
# Define the prompt template to route the input to the different chains
MULTI_PROMPT_ROUTER_TEMPLATE = """Given a raw text input to a \
language model select the model prompt best suited for the input. \
You will be given the names of the available prompts and a \
description of what the prompt is best suited for. \
You may also revise the original input if you think that revising\
it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt \
names specified below OR it can be "DEFAULT" if the input is not\
well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input \
if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (remember to include the ```json)>>"""

In [64]:
# Add destination chains to the prompt template for routing
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destinations_str
)

# Create the prompt template for routing
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)

# Create the router chain that will route the intput to the best suited chain
router_chain = LLMRouterChain.from_llm(llm, router_prompt)

In [65]:
#chain.run("What is black body radiation?") # Output will be routed to Physics

# Question and Answer

## Vector Store Index

It allows to implement a Q&A LLM

In [71]:
# Instantiate the loader for the .CSV file
# loader = CSVLoader(file_path='csv_file_path')

In [72]:
# Define a Vector Store that will store embeddings of the columns inside the .CSV loaded in 'loader'
# NOTE: Embeddings are easier to be queried from a LLM, since there are less tokens that an actual text
# index = VectorstoreIndexCreator(
#     vectorstore_cls=DocArrayInMemorySearch
# ).from_loaders([loader])

In [73]:
# Define the query
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [74]:
# Retrieve LLM response
# response = index.query(query)

## Open AI Embeddings

It returns the embedding of the given text.

In [75]:
# Embedder client
#embeddings = OpenAIEmbeddings()

In [76]:
# Retrieve the embedding
#embed = embeddings.embed_query("Hi my name is Harrison")

### Embeddings DB

In [77]:
# Load all the docs from the .CSV file
#docs = loader.load()

In [None]:
# Create the DB
# db = DocArrayInMemorySearch.from_documents(
#     docs, 
#     embeddings
# )

In [78]:
# Define the query
query = "Please suggest a shirt with sunblocking"

In [79]:
# Look for similar documents
#docs = db.similarity_search(query)

In [80]:
# In order to incorporate the returned docs into a more suitable response, let's use an LLM

In [81]:
# Define a retriever to query the Embeddings DB
# retriever = db.as_retriever()

In [82]:
# Define the LLM
#llm = ChatOpenAI(temperature = 0.0)

In [83]:
# Prepare prompt
#qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [84]:
# Query the LLM
# response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
# shirts with sun protection in a table in markdown and summarize each one.") 

In [85]:
# Create a chain from the retriever to the LLM
# qa_stuff = RetrievalQA.from_chain_type(
#     llm=llm, 
#     chain_type="stuff", 
#     retriever=retriever, 
#     verbose=True
# )

In [86]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [87]:
#response = qa_stuff.run(query)

# Agents

## Zero Shot

First we need to nstall wikipedia as the source of information

``` bash
pip install -U wikipedia
```

Let's define the LLM (the reasoning engine that will guide the Agent) and the tools the agent will use

``` python
# Define the LLM
llm = ChatOpenAI(temperature=0)

# Load the tools the Agent will use
tools = load_tools(["llm-math","wikipedia"], llm=llm)
```

Finally define the Agent with the LLM and the Tools:

``` python
# Define the agent
agent= initialize_agent(
    tools, 
    llm, 
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose = True)

# Query the agent
agent("What is the 25% of 300?")
```

The LLM would eventually understand they do not know the answer and need to rely on one of their tools (llm-math).

It is also possible to define your own tools or source of information used by the Agent.

## Python

``` python
# Define agent
agent = create_python_agent(
    llm,
    tool=PythonREPLTool(),
    verbose=True
)

customer_list = [["Harrison", "Chase"], 
                 ["Lang", "Chain"],
                 ["Dolly", "Too"],
                 ["Elle", "Elem"], 
                 ["Geoff","Fusion"], 
                 ["Trance","Former"],
                 ["Jen","Ayai"]
                ]
agent.run(f"""Sort these customers by \
last name and then first name \
and print the output: {customer_list}""") 
```