In [21]:
import sys
print(sys.executable)

/Users/arashkhajeh/.pyenv/versions/3.12.0/bin/python


# Trying a test database

In [82]:
from langchain_community.utilities import SQLDatabase
from pyprojroot import here
import warnings
warnings.filterwarnings("ignore")

In [83]:
db_path = str(here("Data")) + "/sqldb.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [84]:
db

<langchain_community.utilities.sql_database.SQLDatabase at 0x1320f9280>

In [85]:
# validate the connection to the vectordb
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM Artist LIMIT 10;")

sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']


"[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham')]"

In [86]:
from dotenv import load_dotenv
import os
print("Environment variables are loaded:", load_dotenv())

# Optional: print model name if you set it
print("Model name:", os.getenv("gpt_deployment_name"))  # e.g., "gpt-3.5-turbo"

Environment variables are loaded: True
Model name: gpt-3.5-turbo


In [87]:
from openai import OpenAI

messages = [
    {"role": "system", "content": str(
        "You are a helpful assistant"
    )},
    {"role": "user", "content": str("hello")}
]

client = OpenAI()

response = client.chat.completions.create(
    model=os.getenv("gpt_deployment_name"),
    messages=messages
)

print(response.choices[0].message.content)

Hello! How can I assist you today?


In [88]:
# SQL Query chain

# Load the LLM
from langchain.chat_models import ChatOpenAI

model_name = os.getenv("gpt_deployment_name")
openai_api_key = os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(model_name=model_name,
                 temperature=0,
                 openai_api_key = openai_api_key)

In [None]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many employees are there"})
print(response)

SELECT COUNT("EmployeeId") AS "TotalEmployees" FROM "Employee"


In [90]:
# execute the query
db.run(response)

'[(8,)]'

In [91]:
chain.get_prompts()[0].pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

In [92]:
### **Add QuerySQLDataBaseTool to the chain**

In [93]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

write_query = create_sql_query_chain(llm, db)
execute_query = QuerySQLDataBaseTool(db=db)

chain = write_query | execute_query

chain.invoke({"question": "How many employees are there"})

'[(8,)]'

In [94]:
# Answer the question in a user friendly manner
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)

chain.invoke({"question": "How many employees are there"})


'There are a total of 8 employees.'

In [95]:
# Using SQL Agent
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)

In [96]:
agent_executor.invoke(
    {
        "input": "List the total sales per country. Which country's customers spent the most?"
    }
)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Customer, Invoice, InvoiceLine'}`


[0m[33;1m[1;3m
CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Employee" ("EmployeeId")
)

/*
3 rows from Customer table:
CustomerId	FirstName	LastName	Company	Address	City	State	Country	PostalCode	Phone	Fax	Email	SupportRepId
1	Luís	Gonçalves	

{'input': "List the total sales per country. Which country's customers spent the most?",
 'output': 'The total sales per country are as follows:\n\n1. USA: $523.06\n2. Canada: $303.96\n3. France: $195.10\n4. Brazil: $190.10\n5. Germany: $156.48\n6. United Kingdom: $112.86\n7. Czech Republic: $90.24\n8. Portugal: $77.24\n9. India: $75.26\n10. Chile: $46.62\n\nThe country whose customers spent the most is the USA with a total sales of $523.06.'}

In [97]:
agent_executor.invoke({"input": "Describe the playlisttrack table"})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'PlaylistTrack'}`


[0m[33;1m[1;3m
CREATE TABLE "PlaylistTrack" (
	"PlaylistId" INTEGER NOT NULL, 
	"TrackId" INTEGER NOT NULL, 
	PRIMARY KEY ("PlaylistId", "TrackId"), 
	FOREIGN KEY("TrackId") REFERENCES "Track" ("TrackId"), 
	FOREIGN KEY("PlaylistId") REFERENCES "Playlist" ("PlaylistId")
)

/*
3 rows from PlaylistTrack table:
PlaylistId	TrackId
1	3402
1	3389
1	3390
*/[0m[32;1m[1;3mThe `PlaylistTrack` table has the following columns:
- PlaylistId (INTEGER, NOT NULL)
- TrackId (INTEGER, NOT NULL)

It has a composite primary key on the columns PlaylistId and TrackId. Additionally, there are foreign key constraints on TrackId referencing the Track table's TrackId column, and on Playlis

{'input': 'Describe the playlisttrack table',
 'output': "The `PlaylistTrack` table has the following columns:\n- PlaylistId (INTEGER, NOT NULL)\n- TrackId (INTEGER, NOT NULL)\n\nIt has a composite primary key on the columns PlaylistId and TrackId. Additionally, there are foreign key constraints on TrackId referencing the Track table's TrackId column, and on PlaylistId referencing the Playlist table's PlaylistId column.\n\nHere are 3 sample rows from the `PlaylistTrack` table:\n- PlaylistId: 1, TrackId: 3402\n- PlaylistId: 1, TrackId: 3389\n- PlaylistId: 1, TrackId: 3390"}

# Try the comments_sampled database

In [142]:
from pyprojroot import here
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine
import pandas as pd

csv_file_path = str(here("Data")) + "/comments_sampled.csv"
df = pd.read_csv(csv_file_path)


db_path = str(here("Data")) + "/comments.db"
db_path = f"sqlite:///{db_path}"

engine = create_engine(db_path)
df.to_sql("comments", engine, index=False)


100

In [11]:
# validate the connection to the vectordb
from langchain_community.utilities import SQLDatabase
from pyprojroot import here

db_path = str(here("Data")) + "/comments.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM comments LIMIT 10;")

sqlite
['comments']


"[(0, 40203804, None, 'Crystal structures and magnetic properties of the BaAl4-type structure derivatives in Gd-Al-Ga system'), (1, 40017624, None, 'Structural Features and Magnetic Properties of Solid Solutions in the CuFe2O4-Li0.5Fe2.5O4-Mn3O4 System'), (2, 40231825, None, 'Crystal structure, thermal expansion and high-temperature electrical conductivity of A-site deficient La2-zCo1+y(MgxNb1-x)1-yO6 double perovskites'), (3, 40236382, None, 'New rutile solid solutions, Ti1-4xLixM3xO2: M= Nb, Ta, Sb'), (4, 640506, None, 'Saltonseaite, K3NaMn2+Cl6, the Mn analogue of rinneite from the Salton Sea, California'), (5, 40232664, None, 'Cesium salts of niobo-tungstate isopolyanions with intermediate group V-group VI character'), (6, 40286218, None, 'Heterocyclic nitrilimines and their use in the synthesis of complex high-nitrogen materials'), (7, 40084534, None, 'Magnetic and Electrical Properties of LayAxMnuO3 (A = Na, K, Rb, and Sr) with Perovskite-Type Structure'), (8, 50090860, '516-517 

In [12]:
from dotenv import load_dotenv
import os
print("Environment variables are loaded:", load_dotenv())

# Optional: print model name if you set it
print("Model name:", os.getenv("gpt_deployment_name"))  # e.g., "gpt-3.5-turbo"

Environment variables are loaded: True
Model name: gpt-3.5-turbo


In [13]:
from openai import OpenAI

messages = [
    {"role": "system", "content": str(
        "You are a helpful assistant"
    )},
    {"role": "user", "content": str("hello")}
]

client = OpenAI()

response = client.chat.completions.create(
    model=os.getenv("gpt_deployment_name"),
    messages=messages
)

print(response.choices[0].message.content)

Hello! How can I assist you today?


In [14]:
# SQL Query chain

# Load the LLM
from langchain.chat_models import ChatOpenAI

model_name = os.getenv("gpt_deployment_name")
openai_api_key = os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(model_name=model_name,
                 temperature=0,
                 openai_api_key = openai_api_key)

In [15]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many entries are there in comments table?"})
print(response)

SELECT COUNT("Unnamed: 0") AS TotalEntries
FROM comments;


In [16]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many entries have melting point?"})
print(response)

SELECT COUNT(*) 
FROM comments 
WHERE "Melting Point" IS NOT NULL


In [17]:
# execute the query
db.run(response)

'[(2,)]'

In [21]:
# Using Chain

from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

write_query = create_sql_query_chain(llm, db)
execute_query = QuerySQLDataBaseTool(db=db)

chain = write_query | execute_query

chain.invoke({"question": "How many entries have melting point?"})

'[(2,)]'

In [22]:
# Answer the question in a user friendly manner
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)

chain.invoke({"question": "How many entries have melting point?"})

'There are 2 entries in the comments table that have a melting point value.'

# Using SQL Agent

In [28]:
import os
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")
print("Environment variables are loaded:", load_dotenv())
print("test by reading a variable:", os.getenv("gpt_deployment_name"))

Environment variables are loaded: True
test by reading a variable: gpt-3.5-turbo


In [29]:
# Load the LLM
from langchain.chat_models import ChatOpenAI

model_name = os.getenv("gpt_deployment_name")
openai_api_key = os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(model_name=model_name,
                 temperature=0,
                 openai_api_key = openai_api_key)

In [30]:
from langchain_community.agent_toolkits import create_sql_agent
agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)

In [32]:
response = agent_executor.invoke({"input": "Describe the comments table."})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcomments[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'comments'}`


[0m[33;1m[1;3m
CREATE TABLE comments (
	"Unnamed: 0" BIGINT, 
	"ProductID" BIGINT, 
	"Melting Point" TEXT, 
	"Article Title" TEXT
)

/*
3 rows from comments table:
Unnamed: 0	ProductID	Melting Point	Article Title
0	40203804	None	Crystal structures and magnetic properties of the BaAl4-type structure derivatives in Gd-Al-Ga syste
1	40017624	None	Structural Features and Magnetic Properties of Solid Solutions in the CuFe2O4-Li0.5Fe2.5O4-Mn3O4 Sys
2	40231825	None	Crystal structure, thermal expansion and high-temperature electrical conductivity of A-site deficien
*/[0m[32;1m[1;3mThe comments table has the following columns:
1. Unnamed: 0 (BIGINT)
2. ProductID (BIGINT)
3. Melting Point (TEXT)
4. Article Title (TEXT)

Here are 3 sample rows from the comments table:
1. Unn

In [35]:
print(response["output"])

The comments table has the following columns:
1. Unnamed: 0 (BIGINT)
2. ProductID (BIGINT)
3. Melting Point (TEXT)
4. Article Title (TEXT)

Here are 3 sample rows from the comments table:
1. Unnamed: 0: 40203804, ProductID: 40203804, Melting Point: None, Article Title: Crystal structures and magnetic properties of the BaAl4-type structure derivatives in Gd-Al-Ga system
2. Unnamed: 0: 40017624, ProductID: 40017624, Melting Point: None, Article Title: Structural Features and Magnetic Properties of Solid Solutions in the CuFe2O4-Li0.5Fe2.5O4-Mn3O4 Sys
3. Unnamed: 0: 40231825, ProductID: 40231825, Melting Point: None, Article Title: Crystal structure, thermal expansion and high-temperature electrical conductivity of A-site deficien


In [39]:
response = agent_executor.invoke({"input": "How many entries have melting points in the comments table?"})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcomments[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'comments'}`


[0m[33;1m[1;3m
CREATE TABLE comments (
	"Unnamed: 0" BIGINT, 
	"ProductID" BIGINT, 
	"Melting Point" TEXT, 
	"Article Title" TEXT
)

/*
3 rows from comments table:
Unnamed: 0	ProductID	Melting Point	Article Title
0	40203804	None	Crystal structures and magnetic properties of the BaAl4-type structure derivatives in Gd-Al-Ga syste
1	40017624	None	Structural Features and Magnetic Properties of Solid Solutions in the CuFe2O4-Li0.5Fe2.5O4-Mn3O4 Sys
2	40231825	None	Crystal structure, thermal expansion and high-temperature electrical conductivity of A-site deficien
*/[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT COUNT(*) FROM comments WHERE "Melting Point" IS NOT NULL'}`


[0m[36;1m[1;3m[(2,)][0m[32;1m[1;3mThere are 2 entries in the comments table 

In [42]:
response = agent_executor.invoke({"input": "Can you tell me in what article in the comments table CuFe2O4 is appeared? Also show me th product ID of this entry"})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcomments[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'comments'}`


[0m[33;1m[1;3m
CREATE TABLE comments (
	"Unnamed: 0" BIGINT, 
	"ProductID" BIGINT, 
	"Melting Point" TEXT, 
	"Article Title" TEXT
)

/*
3 rows from comments table:
Unnamed: 0	ProductID	Melting Point	Article Title
0	40203804	None	Crystal structures and magnetic properties of the BaAl4-type structure derivatives in Gd-Al-Ga syste
1	40017624	None	Structural Features and Magnetic Properties of Solid Solutions in the CuFe2O4-Li0.5Fe2.5O4-Mn3O4 Sys
2	40231825	None	Crystal structure, thermal expansion and high-temperature electrical conductivity of A-site deficien
*/[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT * FROM comments WHERE "Article Title" LIKE \'%CuFe2O4%\''}`


[0m[36;1m[1;3m[(1, 40017624, None, 'Structural Features and Magnetic Properties

In [44]:
print(response["output"])

The article "Structural Features and Magnetic Properties of Solid Solutions in the CuFe2O4-Li0.5Fe2.5O4-Mn3O4 System" contains the mention of CuFe2O4 in the comments table. The product ID of this entry is 40017624.


# Testing on the entire comments dataset

In [45]:
from pyprojroot import here
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine
import pandas as pd

csv_file_path = str(here("Data")) + "/comments.csv"
df = pd.read_csv(csv_file_path)


db_path = str(here("Data")) + "/comments.db"
db_path = f"sqlite:///{db_path}"

engine = create_engine(db_path)
df.to_sql("comments", engine, index=False)

1104137

In [46]:
# validate the connection to the vectordb
from langchain_community.utilities import SQLDatabase
from pyprojroot import here

db_path = str(here("Data")) + "/comments.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM comments LIMIT 10;")

sqlite
['comments']




In [47]:
from dotenv import load_dotenv
import os
print("Environment variables are loaded:", load_dotenv())

# Optional: print model name if you set it
print("Model name:", os.getenv("gpt_deployment_name"))  # e.g., "gpt-3.5-turbo"

Environment variables are loaded: True
Model name: gpt-3.5-turbo


In [48]:
from openai import OpenAI

messages = [
    {"role": "system", "content": str(
        "You are a helpful assistant"
    )},
    {"role": "user", "content": str("hello")}
]

client = OpenAI()

response = client.chat.completions.create(
    model=os.getenv("gpt_deployment_name"),
    messages=messages
)

print(response.choices[0].message.content)

Hello! How can I assist you today?


In [None]:
# SQL Query chain

# Load the LLM
from langchain.chat_models import ChatOpenAI

model_name = os.getenv("gpt_deployment_name")
openai_api_key = os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(model_name=model_name,
                 temperature=0,
                 openai_api_key = openai_api_key)

In [50]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many entries are there in comments table?"})
print(response)

SELECT


⚠️ Problem: The Chain Can't Handle Large Schemas Well
The reason you're seeing just SELECT when there are many columns is likely because:

create_sql_query_chain internally injects table schema info into the prompt, and

When the schema (i.e., list of columns) is too long, the total token length exceeds the model's context limit or the prompt becomes too noisy,

So the LLM returns an incomplete SQL query, like just SELECT.



✅ Solutions
✅ Option 1: Use a Narrowed Schema (Only Include Relevant Tables)
LangChain allows you to limit which tables are included in the prompt:

python
Copy
Edit
db = SQLDatabase.from_uri(..., include_tables=["comments"])
chain = create_sql_query_chain(llm, db)
This helps cut down schema size sent to the model.

✅ Option 2: Use a More Capable Model (Like gpt-4 or gpt-3.5-turbo-16k)
Larger models with more context length can handle long table schemas better.

python
Copy
Edit
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
Or if you’re using OpenAI tools agent with auto-function-calling, that often performs better for large schemas:

python
Copy
Edit
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit

agent = create_sql_agent(
    llm=llm,
    toolkit=SQLDatabaseToolkit(db=db, llm=llm),
    verbose=True,

In [68]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain_community.utilities import SQLDatabase  # this might vary depending on your LangChain version

# Step 1: Initialize your LLM
llm = ChatOpenAI(temperature=0, model=model_name)  # or "gpt-4"


# Step 2: Create the SQL agent
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
agent = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True, handle_parsing_errors=True)

# Step 3: Ask a question
response = agent.invoke({"input": "How many entries have melting points in the comments table?"})
print(response)




[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mcomments[0m[32;1m[1;3mI should query the schema of the comments table to see if there is a column for melting points.
Action: sql_db_schema
Action Input: comments[0m[33;1m[1;3m
CREATE TABLE comments (
	"Unnamed: 0" BIGINT, 
	"ProductID" BIGINT, 
	"FullComment" TEXT, 
	"Unit Cell Data Source" TEXT, 
	"General Comments" TEXT, 
	"Reason O Quality Was Assigned" TEXT, 
	"Deleted Or Rejected By" TEXT, 
	"Color" TEXT, 
	"Melting Point" TEXT, 
	"Water Loss" TEXT, 
	"Structures" TEXT, 
	"Optical Data" TEXT, 
	"Additional Patterns" TEXT, 
	"Unit Cell" TEXT, 
	"Sample Preparation" TEXT, 
	"Sublimation Point" TEXT, 
	"Analysis" TEXT, 
	"Deleted" TEXT, 
	"Boiling Point" TEXT, 
	"Article Title" TEXT, 
	"Note" TEXT, 
	"Additional Diffraction Lines" TEXT, 
	"Opaque Optical Data" TEXT, 
	"Delete" TEXT, 
	"Reflectance" TEXT, 
	"Vickers Hardness Number" TEXT, 
	"Compositio

ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: `The`
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

### Switch to GPT-4

In [71]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain_community.utilities import SQLDatabase  # this might vary depending on your LangChain version

# Step 1: Initialize your LLM
llm = ChatOpenAI(temperature=0, model="gpt-4")  # or "gpt-4"


# Step 2: Create the SQL agent
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
agent = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True, handle_parsing_errors=True)

# Step 3: Ask a question
response = agent.invoke({"input": "How many entries are there in the comments table?"})
print(response)




[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m[38;5;200m[1;3mcomments[0m[32;1m[1;3mThe comments table exists in the database. Now I need to construct a query to count the number of entries in the comments table.
Action: sql_db_query_checker
Action Input: "SELECT COUNT(*) FROM comments"[0m[36;1m[1;3mSELECT COUNT(*) FROM comments[0m[32;1m[1;3mThe query is correct. Now I can execute it to get the number of entries in the comments table.
Action: sql_db_query
Action Input: "SELECT COUNT(*) FROM comments"[0m[36;1m[1;3m[(1104137,)][0m[32;1m[1;3mI now know the final answer
Final Answer: There are 1,104,137 entries in the comments table.[0m

[1m> Finished chain.[0m
{'input': 'How many entries are there in the comments table?', 'output': 'There are 1,104,137 entries in the comments table.'}


In [72]:
response = agent.invoke({"input": "How many entries have melting points in the comments table?"})
print(response)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m[38;5;200m[1;3mcomments[0m[32;1m[1;3mThe only table in the database is 'comments'. I should check the schema of this table to see if there is a column related to melting points.
Action: sql_db_schema
Action Input: comments[0m[33;1m[1;3m
CREATE TABLE comments (
	"Unnamed: 0" BIGINT, 
	"ProductID" BIGINT, 
	"FullComment" TEXT, 
	"Unit Cell Data Source" TEXT, 
	"General Comments" TEXT, 
	"Reason O Quality Was Assigned" TEXT, 
	"Deleted Or Rejected By" TEXT, 
	"Color" TEXT, 
	"Melting Point" TEXT, 
	"Water Loss" TEXT, 
	"Structures" TEXT, 
	"Optical Data" TEXT, 
	"Additional Patterns" TEXT, 
	"Unit Cell" TEXT, 
	"Sample Preparation" TEXT, 
	"Sublimation Point" TEXT, 
	"Analysis" TEXT, 
	"Deleted" TEXT, 
	"Boiling Point" TEXT, 
	"Article Title" TEXT, 
	"Note" TEXT, 
	"Additional Diffraction Lines" TEXT, 
	"Opaque Optical Data" TEXT, 
	"Delete" TEXT, 
	"Reflectance" TEXT, 