In [1]:
# https://python.langchain.com/docs/use_cases/sql/agents

# Prepare SQL

In [1]:
%%writefile create_db.sql

CREATE TABLE Orders (
    Id INT,
    ProductId INT NOT NULL,
    CustomerId INT NOT NULL
);

CREATE TABLE Products (
    Id INT,
    Name CHAR(255)
);

CREATE TABLE Customers (
    Id INT,
    Name CHAR(255),
    CountryId INT NOT NULL
);

CREATE TABLE Countries (
    Id INT,
    Name CHAR(255)
);

INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(1, 1, 1);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(2, 1, 2);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(3, 1, 3);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(4, 2, 1);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(5, 2, 3);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(6, 2, 4);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(7, 3, 3);
INSERT INTO Orders(Id, ProductId, CustomerId) VALUES(8, 3, 4);

INSERT INTO Products(Id, Name) VALUES(1, 'pixel6a');
INSERT INTO Products(Id, Name) VALUES(2, 'iphone16');
INSERT INTO Products(Id, Name) VALUES(3, 'xperia1');

INSERT INTO Countries(Id, Name) VALUES(1, 'Japan');
INSERT INTO Countries(Id, Name) VALUES(2, 'US');
INSERT INTO Countries(Id, Name) VALUES(3, 'China');

INSERT INTO Customers(Id, Name, CountryId) VALUES(1, 'John', 2);
INSERT INTO Customers(Id, Name, CountryId) VALUES(2, 'Taro', 1);
INSERT INTO Customers(Id, Name, CountryId) VALUES(3, 'Ben', 2);
INSERT INTO Customers(Id, Name, CountryId) VALUES(4, 'Jing', 3);
INSERT INTO Customers(Id, Name, CountryId) VALUES(5, 'Takashi', 1);

Overwriting create_db.sql


In [2]:
# https://database.guide/2-sample-databases-sqlite/
!rm orders.sqlite
!sqlite3 orders.sqlite ".read create_db.sql"

In [3]:
import sqlite3

with sqlite3.connect('orders.sqlite') as con:
    response = con.execute("""
    SELECT o.Id, p.Name, c.Name, co.Name from Orders as o
    INNER JOIN Products as p ON o.ProductId == p.Id
    INNER JOIN Customers as c ON o.CustomerId == c.Id
    INNER JOIN Countries as co ON c.CountryId == co.Id
    """)
    results = response.fetchall()
results

[(1, 'pixel6a', 'John', 'US'),
 (2, 'pixel6a', 'Taro', 'Japan'),
 (3, 'pixel6a', 'Ben', 'US'),
 (4, 'iphone16', 'John', 'US'),
 (5, 'iphone16', 'Ben', 'US'),
 (6, 'iphone16', 'Jing', 'China'),
 (7, 'xperia1', 'Ben', 'US'),
 (8, 'xperia1', 'Jing', 'China')]

# Prepare LLM model

In [4]:
import phoenix as px

session = px.launch_app()

from phoenix.trace.langchain import LangChainInstrumentor
LangChainInstrumentor().instrument()

  from .autonotebook import tqdm as notebook_tqdm


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📺 To view the Phoenix app in a notebook, run `px.active_session().view()`
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [5]:
from langchain_community.agent_toolkits import create_sql_agent
from langchain_community.llms import Ollama

from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = Ollama(
    # model="llama2:13b-chat",
    model="codellama:7b",
    # callback_manager=callback_manager,
    verbose=True,
)

# Construct agent tool

In [45]:
from langchain.tools import tool
import json

@tool
def get_order_by_product_names(product_names: str) -> str:
    """
    This tool is to fetch the orders with customer name by one or more product names.
    Inputs should be comma-separated.
    """
    product_names = [name.strip() for name in product_names.split(',')]
    product_names_quoted = [f"'{name}'" for name in product_names]
    product_names_for_in_clause = f"({', '.join(product_names_quoted)})"
    with sqlite3.connect('orders.sqlite') as con:
        response = con.execute(f"""
            SELECT o.Id, p.Name, c.Name from Orders as o
            INNER JOIN Products as p ON o.ProductId == p.Id
            INNER JOIN Customers as c ON o.CustomerId == c.Id
            WHERE p.Name in {product_names_for_in_clause}
        """)
        results = response.fetchall()
        print(f'function called: {results}')
        if results:
            return f"""
{'\n'.join([f'* OrderId: {r[0]}, ProductName: {r[1]}, CustomerName: {r[2]}' for r in results])}
"""
        return "no data"

@tool
def get_customers_by_country_names(country_names: str) -> str:
    """
    This tool is to fetch the customers by one or more country names
    Inputs should be comma-separated.
    """
    country_names = [name.strip() for name in country_names.split(',')]
    country_names_quoted = [f"'{name}'" for name in country_names]
    country_names_for_in_clause = f"({', '.join(country_names_quoted)})"
    with sqlite3.connect('orders.sqlite') as con:
        response = con.execute(f"""
            SELECT c.Name, co.Name from Customers as c
            INNER JOIN Countries as co ON c.CountryId == co.Id
            WHERE co.Name in {country_names_for_in_clause}
        """)
        results = response.fetchall()
        print(f'function called: {results}')
        if results:
            return f"""
{'\n'.join([f'* CustomerName: {r[0]}, CountryName: {r[1]}' for r in results])}
"""
        return "no data"

for attr in ['name', 'description', 'args']:
    print(getattr(get_order_by_product_names, attr))

print(get_order_by_product_names.invoke({'product_names': 'pixel6a, iphone16'}))
print(get_customers_by_country_names.invoke({'country_names': 'Japan, US'}))

tools = [get_order_by_product_names, get_customers_by_country_names]

get_order_by_product_names
get_order_by_product_names(product_names: str) -> str - This tool is to fetch the orders with customer name by one or more product names.
    Inputs should be comma-separated.
{'product_names': {'title': 'Product Names', 'type': 'string'}}
function called: [(1, 'pixel6a', 'John'), (2, 'pixel6a', 'Taro'), (3, 'pixel6a', 'Ben'), (4, 'iphone16', 'John'), (5, 'iphone16', 'Ben'), (6, 'iphone16', 'Jing')]

* OrderId: 1, ProductName: pixel6a, CustomerName: John
* OrderId: 2, ProductName: pixel6a, CustomerName: Taro
* OrderId: 3, ProductName: pixel6a, CustomerName: Ben
* OrderId: 4, ProductName: iphone16, CustomerName: John
* OrderId: 5, ProductName: iphone16, CustomerName: Ben
* OrderId: 6, ProductName: iphone16, CustomerName: Jing

function called: [('Takashi', 'Japan'), ('Taro', 'Japan'), ('Ben', 'US'), ('John', 'US')]

* CustomerName: Takashi, CountryName: Japan
* CustomerName: Taro, CountryName: Japan
* CustomerName: Ben, CountryName: US
* CustomerName: John, Co

# Construct agent

In [13]:
from langchain.agents import initialize_agent, AgentType, structured_chat, AgentExecutor

FORMAT_INSTRUCTIONS = structured_chat.prompt.FORMAT_INSTRUCTIONS + '''
You can refer to the following example of the previous conversation with human.

Example 1:
Human: What is the square root of 4?
Thought: I need to calculate the square root of 4
Action:
```
{{{{
    "action": "calculate_square_root",
    "action_input": {{{{"number": 4}}}}
    
}}}}
```
Observation: {{{{"answer": 2}}}}
Thought: I know what to respond
Action:
```
{{{{
  "action": "Final Answer",
  "action_input": "The square root of 4 is 2"
}}}}
```
<</SYS>>
'''

PREFIX = "[INST]<<SYS>> You are smart that selects a function from list of functions based on user queries.\
Run only one function tool at a time or in one query.<</SYS>>\n"

agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    # handle_parsing_errors=True,
    # memory=memory,
    agent_kwargs={
        'prefix': PREFIX, 
    #     'prefix': '[INST]<<SYS>>' + structured_chat.prompt.PREFIX, # + ' DO NOT modify a quoted text in a question.',
        'suffix': structured_chat.prompt.SUFFIX + '\n\n[/INST]',
        'human_message_template': '{input}\n\n{agent_scratchpad}',
        'format_instructions': FORMAT_INSTRUCTIONS
    }
)

  warn_deprecated(


- https://medium.com/@sandyshah1990/langchain-agents-and-function-calling-using-llama-2-locally-29ce057e4789
- https://github.com/pinecone-io/examples/blob/master/learn/generation/llm-field-guide/llama-2/llama-2-70b-chat-agent.ipynb

In [39]:
agent_response = agent_executor.invoke("Which customers ordered the product 'pixel6a' and 'iphone16'?")
agent_response



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: {
"action": "get_order_by_product_names",
"action_input": {"product_names": ["pixel6a", "iphone16"]}
}

[0m

[1m> Finished chain.[0m


{'input': "Which customers ordered the product 'pixel6a' and 'iphone16'?",
 'output': 'Action: {\n"action": "get_order_by_product_names",\n"action_input": {"product_names": ["pixel6a", "iphone16"]}\n}\n\n'}

# Agent with memory

In [126]:
from langchain.memory import ConversationBufferWindowMemory
memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5, return_messages=True, output_key="output"
)

agent_executor_with_memory = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, # .STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False,
    handle_parsing_errors=True,
    memory=memory,
    agent_kwargs={
        'prefix': PREFIX,
    #     'prefix': '[INST]<<SYS>>' + structured_chat.prompt.PREFIX, # + ' DO NOT modify a quoted text in a question.',
        'suffix': structured_chat.prompt.SUFFIX + '\n\n[/INST]',
        'human_message_template': '{input}\n\n{agent_scratchpad}',
        'format_instructions': FORMAT_INSTRUCTIONS
    }
)

In [127]:
agent_executor_with_memory.invoke("Which customers are from 'Japan'?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of

KeyboardInterrupt: 

In [121]:
agent_executor_with_memory.invoke("What products did those customers order?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: [INST]<<SYS>> You are smart that selects a function from list of functions based on user queries.Run only one function tool at a time or in one query.<</SYS>>


get_order_by_product_name: get_order_by_product_name(product_names: list[str]) -> str - This tool is to fetch the orders with customer name by product name, args: {'product_names': {'title': 'Product Names', 'type': 'array', 'items': {'type': 'string'}}}
get_customers_by_country_names: get_customers_by_country_names(country_names: list[str]) -> str - This tool is to fetch the customers by one or more country names, args: {'country_names': {'title': 'Country Names', 'type': 'array', 'items': {'type': 'string'}}}

Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).

Valid "action" values: "Final Answer" or get_order_by_product_name, get_customers_by_country_names

Provide only ONE act

{'input': 'What products did those customers order?',
 'chat_history': [HumanMessage(content="Which customers are from 'Japan'?"),
  AIMessage(content='The customers from Japan are Takashi and Taro.')],
 'output': 'Action:\n{\n"action": "get_order_by_product_name",\n"action_input": { "product_names": ["Product 1", "Product 2"] }\n}\n\n'}

In [46]:
from langchain.agents import AgentExecutor, ZeroShotAgent, create_react_agent
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain

prefix = """[INST]<<SYS>>Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
suffix = """<</SYS>>
Begin!"

{chat_history}
Question: {input}
{agent_scratchpad}[/INST]"""

# prompt = ZeroShotAgent.create_prompt(
#     tools,
#     prefix=prefix,
#     suffix=suffix,
#     input_variables=["input", "chat_history", "agent_scratchpad"],
# )
# llm_chain = LLMChain(llm=llm, prompt=prompt)
# agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)

memory = ConversationBufferMemory(memory_key="chat_history")

from langchain_core.prompts import PromptTemplate

template = '''[INST]<<SYS>>Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
<</SYS>>

Begin!

Question: {input}
Thought:{agent_scratchpad}[/INST/'''

prompt = PromptTemplate.from_template(template)
agent = create_react_agent(llm, tools, prompt)
agent_chain = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, verbose=True, memory=memory
)

In [48]:
agent_chain.invoke({"input": "who are from 'Japan'?"})



[1m> Entering new AgentExecutor chain...[0m
tool="get_customers_by_country_names('Japan')" tool_input="'Japan'" log="\nThought: Let's use the tool `get_customers_by_country_names` to fetch customers from Japan.\nAction: get_customers_by_country_names('Japan')\nAction Input: 'Japan'"
[32;1m[1;3m
Thought: Let's use the tool `get_customers_by_country_names` to fetch customers from Japan.
Action: get_customers_by_country_names('Japan')
Action Input: 'Japan'[0mget_customers_by_country_names('Japan') is not a valid tool, try one of [get_order_by_product_names, get_customers_by_country_names].tool="get_customers_by_country_names('Japan')" tool_input="'Japan'" log="I apologize for the confusion. Let me try again with a different approach.\n\nQuestion: who are from 'Japan'?\nThought: Since we have a tool `get_customers_by_country_names`, I will use it to fetch customers from Japan.\nAction: get_customers_by_country_names('Japan')\nAction Input: 'Japan'"
[32;1m[1;3mI apologize for the c

KeyboardInterrupt: 

# SQL Chain
https://github.com/langchain-ai/langchain/blob/master/cookbook/LLaMA2_sql_chat.ipynb

In [68]:
from langchain_community.utilities import SQLDatabase

from langchain.globals import set_debug
set_debug(False)

db = SQLDatabase.from_uri("sqlite:///orders.sqlite")

def get_schema(_):
    return db.get_table_info()


def run_query(query):
    return db.run(query)

print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM Products LIMIT 10;")

db.run("""SELECT c.Name 
FROM Customers AS c 
INNER JOIN Orders AS o ON c.Id = o.CustomerId 
INNER JOIN Products AS p ON p.Id = o.ProductId 
WHERE p.Name = 'pixel6a';
""")

sqlite
['Customers', 'Orders', 'Products']


"[('John',), ('Taro',), ('Ben',)]"

In [32]:
# Prompt
from langchain_core.prompts import ChatPromptTemplate

# Update the template based on the type of SQL Database like MySQL, Microsoft SQL Server and so on
template = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Question: {question}
SQL Query:"""
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Given an input question, convert it to a SQL query without triple backticks. No pre-amble. Do not be ambiguous column name. Preserve table and column names."
        ),
        ("human", template),
    ]
)

# Chain to query
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

sql_response = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

sql_response.invoke({"question": "Which customers ordered the product name 'pixel6a'?"})

'```\nSELECT c.Name AS "Customer Name" \nFROM Customers c, Orders o, Products p \nWHERE c.Id = o.CustomerId AND o.ProductId = p.Id AND p.Name LIKE \'%pixel6a%\';\n```'

In [44]:
# Chain to answer
template = """Based on the table schema below, question, sql query, and sql response, write a natural language response:
{schema}

Question: {question}
SQL Query: {query}
SQL Response: {response}"""
prompt_response = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Given an input question, convert it to a SQL query without triple backticks. No pre-amble. Do not be ambiguous column name. Preserve table and column names."
        ),
        ("human", template),
    ]
)

def execute_query(query):
    result = db.run(query["query"].replace("```", ""))
    print(f"""
{query['query']}
{result}
""")
    return result

full_chain = (
    RunnablePassthrough.assign(query=sql_response)
    | RunnablePassthrough.assign(
        schema=get_schema,
        response=execute_query,  #lambda x: db.run(x["query"].replace("```", "")),
    )
    | prompt_response
    | llm
)

# res = full_chain.invoke({"question": "who (customer names) ordered the product 'pixel6a'?"})
res = full_chain.invoke({"question": "what product did John buy?"})
print(res)


SELECT p.Name
FROM Customers c
INNER JOIN Orders o ON c.Id = o.CustomerId
INNER JOIN Products p ON p.Id = o.ProductId
WHERE c.Name = 'John';
[('pixel6a',), ('iphone16',)]


Based on the provided table schema and SQL query, it appears that a customer with the name "John" has purchased two different products from the Orders table. The SQL response is a list of tuples containing the names of the products that John bought, which are 'pixel6a' and 'iphone16'.


Use pre-defined func

In [46]:
from langchain.chains.sql_database.query import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "what product did John buy?"})
response

'What product did John buy?\n\nSQLQuery:\nSELECT Products.Name\nFROM Customers, Orders, Products\nWHERE Customers.Id = Orders.CustomerId AND Orders.ProductId = Products.Id AND Customers.Name = "John";'

# LangGraph
- https://python.langchain.com/docs/langgraph
- https://gist.github.com/virattt/ba0b660cdcaf4161ca1e6e5d8b5de4f8

In [55]:
from langgraph.graph import END, MessageGraph


graph = MessageGraph()

graph.add_node("agent", llm)
graph.add_node("tools", execute_tools)

graph.add_edge("agent", END)

graph.set_entry_point("agent")

runnable = graph.compile()
runnable.get_graph().print_ascii()

+-----------+  
| __start__ |  
+-----------+  
      *        
      *        
      *        
  +-------+    
  | agent |    
  +-------+    
      *        
      *        
      *        
 +---------+   
 | __end__ |   
 +---------+   
