In [1]:
from dotenv import load_dotenv
import os

from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI

In [2]:
load_dotenv(dotenv_path='.env', override=True)

# https://platform.openai.com/api-keys
openai_api_key = os.getenv("OPENAI_API_KEY")

# https://console.groq.com/keys
groq_api_key = os.getenv("GROQ_API_KEY")

# https://console.anthropic.com/dashboard
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")

# https://aistudio.google.com/app/apikey
gemini_api_key = os.getenv("GEMINI_API_KEY")

In [3]:
#llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini", max_tokens=256, openai_api_key=openai_api_key)
llm = ChatGroq(temperature=0.0, model="llama3-8b-8192", max_tokens=256)
user_question= "Qual a capital do Brasil ?"
messages = [{"role": "user", "content": user_question}]
response = llm.invoke(messages)
print(response.content)

A capital do Brasil é Brasília!


In [4]:
from langchain_community.utilities import SQLDatabase
db = SQLDatabase.from_uri("sqlite:///Chinook.db")
print(db.get_table_info())


CREATE TABLE album (
	album_id INTEGER NOT NULL, 
	title NVARCHAR(160) NOT NULL, 
	artist_id INTEGER NOT NULL, 
	PRIMARY KEY (album_id), 
	FOREIGN KEY(artist_id) REFERENCES artist (artist_id)
)

/*
3 rows from album table:
album_id	title	artist_id
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE artist (
	artist_id INTEGER NOT NULL, 
	name NVARCHAR(120), 
	PRIMARY KEY (artist_id)
)

/*
3 rows from artist table:
artist_id	name
1	AC/DC
2	Accept
3	Aerosmith
*/


CREATE TABLE customer (
	customer_id INTEGER NOT NULL, 
	first_name NVARCHAR(40) NOT NULL, 
	last_name NVARCHAR(20) NOT NULL, 
	company NVARCHAR(80), 
	address NVARCHAR(70), 
	city NVARCHAR(40), 
	state NVARCHAR(40), 
	country NVARCHAR(40), 
	postal_code NVARCHAR(10), 
	phone NVARCHAR(24), 
	fax NVARCHAR(24), 
	email NVARCHAR(60) NOT NULL, 
	support_rep_id INTEGER, 
	PRIMARY KEY (customer_id), 
	FOREIGN KEY(support_rep_id) REFERENCES employee (employee_id)
)

/*
3 rows from c

In [6]:
# pip install langchain-experimental
from langchain_experimental.sql import SQLDatabaseChain
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [7]:
ai_msg= db_chain.invoke("How many employees are there?")
ai_msg
ai_msg['result']



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees are there?
[32;1m[1;3mQuestion: How many employees are there?
SQLQuery: SELECT COUNT(*) FROM employee;[0m
SQLResult: [33;1m[1;3m[(8,)][0m
[32;1m[1;3mQuestion: How many employees are there?
SQLQuery: SELECT COUNT(*) FROM employee;[0m
[1m> Finished chain.[0m


'Question: How many employees are there?\nSQLQuery: SELECT COUNT(*) FROM employee;'

In [8]:
sql_query = ai_msg['result'].split("SQLQuery: ")[1].strip()
print(sql_query)

SELECT COUNT(*) FROM employee;


In [9]:
from sqlalchemy import create_engine, text
db_uri = "sqlite:///Chinook.db"  # Altere para seu banco de dados se necessário
engine = create_engine(db_uri)

with engine.connect() as connection:
    result = connection.execute(text(sql_query)).fetchall()
    print("SQL Result: ", result[0][0]) # retorna primeiro elemento da tupla

SQL Result:  8


In [10]:
ai_msg= db_chain.invoke("How many albums by Aerosmith?")
ai_msg
ai_msg['result']



[1m> Entering new SQLDatabaseChain chain...[0m
How many albums by Aerosmith?
[32;1m[1;3mQuestion: How many albums by Aerosmith?
SQLQuery: SELECT COUNT(*) FROM album WHERE artist_id IN (SELECT artist_id FROM artist WHERE name = 'Aerosmith');[0m
SQLResult: [33;1m[1;3m[(1,)][0m
[32;1m[1;3mQuestion: How many albums by Aerosmith?
SQLQuery: SELECT COUNT(*) FROM album WHERE artist_id IN (SELECT artist_id FROM artist WHERE name = 'Aerosmith');[0m
[1m> Finished chain.[0m


"Question: How many albums by Aerosmith?\nSQLQuery: SELECT COUNT(*) FROM album WHERE artist_id IN (SELECT artist_id FROM artist WHERE name = 'Aerosmith');"

In [11]:
from langchain.prompts.prompt import PromptTemplate

template = """
Given an input question, first create a syntactically correct query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:
{table_info}

If someone asks for the general table, they really mean the employee table.
Question: {input}"""

PROMPT = PromptTemplate(
    input_variables=["input", "table_info"], template=template
)

db_chain = SQLDatabaseChain.from_llm(llm, db, prompt=PROMPT, use_query_checker=True, verbose=True)
ai_msg = db_chain.invoke("How many employees are there in the general table?")
ai_msg
ai_msg['result']



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees are there in the general table?
[32;1m[1;3mSELECT COUNT(*) FROM employee;[0m
SQLResult: [33;1m[1;3m[(8,)][0m
[32;1m[1;3mQuestion: How many employees are there in the general table?
SQLQuery: SELECT COUNT(*) FROM employee;[0m
[1m> Finished chain.[0m


'Question: How many employees are there in the general table?\nSQLQuery: SELECT COUNT(*) FROM employee;'

In [17]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
ai_msg = db_chain.invoke("list the tables of all database")
print(ai_msg)
print(ai_msg['result'])



[1m> Entering new SQLDatabaseChain chain...[0m
list the tables of all database
[32;1m[1;3mSQLQuery: SELECT "name" FROM sqlite_master WHERE "type"='table';[0m
SQLResult: [33;1m[1;3m[('album',), ('artist',), ('customer',), ('employee',), ('genre',), ('invoice',), ('invoice_line',), ('media_type',), ('playlist',), ('playlist_track',), ('track',)][0m
[32;1m[1;3mThe database contains the following tables: album, artist, customer, employee, genre, invoice, invoice_line, media_type, playlist, playlist_track, and track.[0m
[1m> Finished chain.[0m
{'query': 'list the tables of all database', 'result': 'The database contains the following tables: album, artist, customer, employee, genre, invoice, invoice_line, media_type, playlist, playlist_track, and track.'}
The database contains the following tables: album, artist, customer, employee, genre, invoice, invoice_line, media_type, playlist, playlist_track, and track.


In [13]:
ai_msg = db_chain.invoke("list the columns in genre table")
print(ai_msg)
print(ai_msg['result'])



[1m> Entering new SQLDatabaseChain chain...[0m
list the columns in genre table
[32;1m[1;3mQuestion: list the columns in genre table
SQLQuery: SELECT "name" FROM genre[0m
SQLResult: [33;1m[1;3m[('Rock',), ('Jazz',), ('Metal',), ('Alternative & Punk',), ('Rock And Roll',), ('Blues',), ('Latin',), ('Reggae',), ('Pop',), ('Soundtrack',), ('Bossa Nova',), ('Easy Listening',), ('Heavy Metal',), ('R&B/Soul',), ('Electronica/Dance',), ('World',), ('Hip Hop/Rap',), ('Science Fiction',), ('TV Shows',), ('Sci Fi & Fantasy',), ('Drama',), ('Comedy',), ('Alternative',), ('Classical',), ('Opera',)][0m
[32;1m[1;3mQuestion: list the columns in genre table
SQLQuery: SELECT "name" FROM genre[0m
[1m> Finished chain.[0m
{'query': 'list the columns in genre table', 'result': 'Question: list the columns in genre table\nSQLQuery: SELECT "name" FROM genre'}
Question: list the columns in genre table
SQLQuery: SELECT "name" FROM genre


In [18]:
query = "list all the names of unique media types  of the media_type table?"
ai_msg = db_chain.invoke(query)
print(ai_msg)



[1m> Entering new SQLDatabaseChain chain...[0m
list all the names of unique media types  of the media_type table?
[32;1m[1;3mSQLQuery: SELECT DISTINCT "name" FROM "media_type" LIMIT 5;[0m
SQLResult: [33;1m[1;3m[('MPEG audio file',), ('Protected AAC audio file',), ('Protected MPEG-4 video file',), ('Purchased AAC audio file',), ('AAC audio file',)][0m
[32;1m[1;3mQuestion: list all the names of unique media types of the media_type table?
SQLQuery: SELECT DISTINCT "name" FROM "media_type" LIMIT 5;[0m
[1m> Finished chain.[0m
{'query': 'list all the names of unique media types  of the media_type table?', 'result': 'Question: list all the names of unique media types of the media_type table?\nSQLQuery: SELECT DISTINCT "name" FROM "media_type" LIMIT 5;'}


In [20]:
print(ai_msg["result"])  # resposta final
print(ai_msg["query"])  # contém o SQL gerado e possivelmente os dados reais

Question: list all the names of unique media types of the media_type table?
SQLQuery: SELECT DISTINCT "name" FROM "media_type" LIMIT 5;
list all the names of unique media types  of the media_type table?


In [24]:
import re
sql_code = ai_msg["result"]
match = re.search(r'SQLQuery:\s*(.*?;)', sql_code, re.DOTALL)
if match:
    sql_query = match.group(1).strip()
print(sql_query)

SELECT DISTINCT "name" FROM "media_type" LIMIT 5;


In [27]:
# Execute manualmente
from langchain.sql_database import SQLDatabase
db = SQLDatabase.from_uri("sqlite:///Chinook.db")
results = db.run(sql_query)  # Isso já retorna uma string com os resultados
print(results)

[('MPEG audio file',), ('Protected AAC audio file',), ('Protected MPEG-4 video file',), ('Purchased AAC audio file',), ('AAC audio file',)]


In [28]:
from sqlalchemy import create_engine, text
engine = create_engine("sqlite:///Chinook.db")
with engine.connect() as conn:
    result = conn.execute(text(sql_query))
    rows = [row for row in result]
    print(rows)

[('MPEG audio file',), ('Protected AAC audio file',), ('Protected MPEG-4 video file',), ('Purchased AAC audio file',), ('AAC audio file',)]
