# DuckDB + LangChain SQLDatabase

[Unlocking ChatGPT's Potential: LangChain Tutorial with DuckDB - YouTube](https://www.youtube.com/watch?v=FoGm7d0paIo)

## Trouble Shooting and Issues

- [Issue: db.get_usable_table_names() return nothing · Issue #4565 · langchain-ai/langchain](https://github.com/langchain-ai/langchain/issues/4565#issuecomment-1833391840)
- [feat: parquet file support for SQL agent · Issue #2002 · langchain-ai/langchain](https://github.com/langchain-ai/langchain/issues/2002)

In [1]:
import duckdb

In [34]:
from sqlalchemy import create_engine

In [3]:
# uri = 'duckdb:///../demo/demo.duckdb'
# (sql_engine := create_engine(uri))

uri = 'duckdb:///:memory:'
# (sql_engine := create_engine(uri))

## DuckDB Connect

- [Python API - DuckDB](https://duckdb.org/docs/api/python/overview.html#using-an-in-memory-database)
- [Connect - DuckDB](https://duckdb.org/docs/connect.html)

In [4]:
# (conn := duckdb.connect('../demo/demo.duckdb'))
(conn := duckdb.connect(':memory:'))

<duckdb.duckdb.DuckDBPyConnection at 0x18d9c8543f0>

In [5]:
(df := conn.read_csv('../demo/demo.csv'))

┌─────────┬─────────────┬──────────────────────┬──────────────────┬───────────┬───┬────────────┬──────────┬──────────────┬────────────┐
│ Trade # │    Type     │        Signal        │    Date/Time     │ Price USD │ … │ Run-up USD │ Run-up % │ Drawdown USD │ Drawdown % │
│  int64  │   varchar   │       varchar        │     varchar      │  double   │   │   double   │  double  │    double    │   double   │
├─────────┼─────────────┼──────────────────────┼──────────────────┼───────────┼───┼────────────┼──────────┼──────────────┼────────────┤
│      23 │ Exit Long   │ Open                 │ NULL             │      NULL │ … │       NULL │     NULL │         NULL │       NULL │
│      23 │ Entry Long  │ {'take_profit': 1.…  │ 2023-11-13 16:30 │   1.06932 │ … │       NULL │     NULL │         NULL │       NULL │
│      22 │ Exit Long   │ exit                 │ 2023-11-10 22:50 │   1.06728 │ … │     203.72 │     0.01 │      2000.16 │        0.1 │
│      22 │ Entry Long  │ {'take_profit': 1.…  │

In [6]:
conn.register('tbl', df)

<duckdb.duckdb.DuckDBPyConnection at 0x18d9c8543f0>

In [7]:
conn.execute('DESCRIBE tbl;').df()

Unnamed: 0,column_name,column_type,null,key,default,extra
0,Trade #,BIGINT,YES,,,
1,Type,VARCHAR,YES,,,
2,Signal,VARCHAR,YES,,,
3,Date/Time,VARCHAR,YES,,,
4,Price USD,DOUBLE,YES,,,
5,Contracts,BIGINT,YES,,,
6,Profit USD,DOUBLE,YES,,,
7,Profit %,DOUBLE,YES,,,
8,Cum. Profit USD,DOUBLE,YES,,,
9,Cum. Profit %,DOUBLE,YES,,,


In [8]:
conn.execute('SHOW TABLES;').df()

Unnamed: 0,name
0,tbl


## LangChain SQLDatabase

- [SQL Database | 🦜️🔗 Langchain](https://python.langchain.com/docs/integrations/toolkits/sql_database)

In [9]:
from langchain.sql_database import SQLDatabase

In [10]:
# This calls sqlalchemy create_engine under the hood
# (db := SQLDatabase.from_uri(uri, include_tables=['tbl']))
# (db := SQLDatabase.from_uri('duckdb:///:memory:'))
(db := SQLDatabase.from_uri(uri, sample_rows_in_table_info=3))

<langchain.utilities.sql_database.SQLDatabase at 0x18dad21dfd0>

In [11]:
db.get_usable_table_names()

[]

In [12]:
# conn.read_csv('../demo/demo.csv')

In [13]:
db.get_usable_table_names()

[]

In [14]:
db.run('SELECT * FROM df;')
# db.run('SELECT * FROM tbl;') # this will fail

'[(23, \'Exit Long\', \'Open\', None, None, None, None, None, None, None, None, None, None, None), (23, \'Entry Long\', "{\'take_profit\': 1.0703532, \'stop_loss\': 1.06848, \'lots\': 23.81}", \'2023-11-13 16:30\', 1.06932, None, None, None, None, None, None, None, None, None), (22, \'Exit Long\', \'exit\', \'2023-11-10 22:50\', 1.06728, 1852000, -2000.16, -0.1, 9729.93, -0.2, 203.72, 0.01, 2000.16, 0.1), (22, \'Entry Long\', "{\'take_profit\': 1.0696884, \'stop_loss\': 1.06728, \'lots\': 18.52}", \'2023-11-10 22:45\', 1.06836, 1852000, -2000.16, -0.1, 9729.93, -0.2, 203.72, 0.01, 2000.16, 0.1), (21, \'Exit Long\', \'exit\', \'2023-11-10 17:45\', 1.06769, 3390000, 2474.7, 0.07, 11730.09, 0.25, 2474.7, 0.07, 271.2, 0.01), (21, \'Entry Long\', "{\'take_profit\': 1.0676857, \'stop_loss\': 1.06637, \'lots\': 33.9}", \'2023-11-10 17:40\', 1.06696, 3390000, 2474.7, 0.07, 11730.09, 0.25, 2474.7, 0.07, 271.2, 0.01), (20, \'Exit Short\', \'exit\', \'2023-11-08 22:30\', 1.06803, 2312000, -2011.4

In [15]:
db.get_table_info()

''

## Try DuckDBLoader

- [DuckDB | 🦜️🔗 Langchain](https://python.langchain.com/docs/integrations/document_loaders/duckdb)

In [16]:
from langchain.document_loaders import DuckDBLoader

In [17]:
(loader := DuckDBLoader("SELECT * FROM read_csv_auto('../demo/demo.csv')"))

<langchain.document_loaders.duckdb_loader.DuckDBLoader at 0x18dadb2ee10>

In [18]:
(data := loader.load())

[Document(page_content='Trade #: 23\nType: Exit Long\nSignal: Open\nDate/Time: None\nPrice USD: None\nContracts: None\nProfit USD: None\nProfit %: None\nCum. Profit USD: None\nCum. Profit %: None\nRun-up USD: None\nRun-up %: None\nDrawdown USD: None\nDrawdown %: None'),
 Document(page_content="Trade #: 23\nType: Entry Long\nSignal: {'take_profit': 1.0703532, 'stop_loss': 1.06848, 'lots': 23.81}\nDate/Time: 2023-11-13 16:30\nPrice USD: 1.06932\nContracts: None\nProfit USD: None\nProfit %: None\nCum. Profit USD: None\nCum. Profit %: None\nRun-up USD: None\nRun-up %: None\nDrawdown USD: None\nDrawdown %: None"),
 Document(page_content='Trade #: 22\nType: Exit Long\nSignal: exit\nDate/Time: 2023-11-10 22:50\nPrice USD: 1.06728\nContracts: 1852000\nProfit USD: -2000.16\nProfit %: -0.1\nCum. Profit USD: 9729.93\nCum. Profit %: -0.2\nRun-up USD: 203.72\nRun-up %: 0.01\nDrawdown USD: 2000.16\nDrawdown %: 0.1'),
 Document(page_content="Trade #: 22\nType: Entry Long\nSignal: {'take_profit': 1.06

## Workarounds (not directly use DuckDB)

### Workaround: DuckDB => SQLite => SQLDatabase

- [feat: parquet file support for SQL agent · Issue #2002 · langchain-ai/langchain](https://github.com/langchain-ai/langchain/issues/2002)

In [19]:
# Create from scratch
# conn.execute("""
# ATTACH '../demo/sqlite_database.db' AS sqlite_db (TYPE SQLITE);
# CREATE TABLE sqlite_db.tbl(id INTEGER, name VARCHAR);
# INSERT INTO sqlite_db.tbl VALUES (42, 'DuckDB');
# """)

conn.execute("""
ATTACH '../demo/sqlite_database.db' AS sqlite_db (TYPE SQLITE);
CREATE TABLE sqlite_db.tbl AS SELECT * FROM tbl;
""")

<duckdb.duckdb.DuckDBPyConnection at 0x18d9c8543f0>

In [20]:
conn.execute("SELECT * FROM sqlite_db.tbl;").df()

Unnamed: 0,Trade #,Type,Signal,Date/Time,Price USD,Contracts,Profit USD,Profit %,Cum. Profit USD,Cum. Profit %,Run-up USD,Run-up %,Drawdown USD,Drawdown %
0,23,Exit Long,Open,,,,,,,,,,,
1,23,Entry Long,"{'take_profit': 1.0703532, 'stop_loss': 1.0684...",2023-11-13 16:30,1.06932,,,,,,,,,
2,22,Exit Long,exit,2023-11-10 22:50,1.06728,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1
3,22,Entry Long,"{'take_profit': 1.0696884, 'stop_loss': 1.0672...",2023-11-10 22:45,1.06836,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1
4,21,Exit Long,exit,2023-11-10 17:45,1.06769,3390000.0,2474.7,0.07,11730.09,0.25,2474.7,0.07,271.2,0.01
5,21,Entry Long,"{'take_profit': 1.0676857, 'stop_loss': 1.0663...",2023-11-10 17:40,1.06696,3390000.0,2474.7,0.07,11730.09,0.25,2474.7,0.07,271.2,0.01
6,20,Exit Short,exit,2023-11-08 22:30,1.06803,2312000.0,-2011.44,-0.08,9255.39,-0.2,138.72,0.01,2011.44,0.08
7,20,Entry Short,"{'take_profit': 1.06609605, 'stop_loss': 1.068...",2023-11-08 22:25,1.06716,2312000.0,-2011.44,-0.08,9255.39,-0.2,138.72,0.01,2011.44,0.08
8,19,Exit Long,exit,2023-11-08 21:25,1.06646,3030000.0,-1999.8,-0.06,11266.83,-0.2,424.2,0.01,1999.8,0.06
9,19,Entry Long,"{'take_profit': 1.0679318, 'stop_loss': 1.0664...",2023-11-08 21:15,1.06712,3030000.0,-1999.8,-0.06,11266.83,-0.2,424.2,0.01,1999.8,0.06


In [21]:
import sqlite3

In [22]:
(sconn := sqlite3.connect('../demo/sqlite_database.db'))

<sqlite3.Connection at 0x18dadb266b0>

In [23]:
sconn.execute('SELECT * FROM tbl;').fetchall()

[(23,
  'Exit Long',
  'Open',
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None),
 (23,
  'Entry Long',
  "{'take_profit': 1.0703532, 'stop_loss': 1.06848, 'lots': 23.81}",
  '2023-11-13 16:30',
  1.06932,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None),
 (22,
  'Exit Long',
  'exit',
  '2023-11-10 22:50',
  1.06728,
  1852000,
  -2000.16,
  -0.1,
  9729.93,
  -0.2,
  203.72,
  0.01,
  2000.16,
  0.1),
 (22,
  'Entry Long',
  "{'take_profit': 1.0696884, 'stop_loss': 1.06728, 'lots': 18.52}",
  '2023-11-10 22:45',
  1.06836,
  1852000,
  -2000.16,
  -0.1,
  9729.93,
  -0.2,
  203.72,
  0.01,
  2000.16,
  0.1),
 (21,
  'Exit Long',
  'exit',
  '2023-11-10 17:45',
  1.06769,
  3390000,
  2474.7,
  0.07,
  11730.09,
  0.25,
  2474.7,
  0.07,
  271.2,
  0.01),
 (21,
  'Entry Long',
  "{'take_profit': 1.0676857, 'stop_loss': 1.06637, 'lots': 33.9}",
  '2023-11-10 17:40',
  1.06696,
  3390000,
  2474.7,
  0.07,
  11730.09,
  0.25,


In [31]:
(sdb := SQLDatabase.from_uri('sqlite:///../demo/sqlite_database.db', sample_rows_in_table_info=3))

<langchain.utilities.sql_database.SQLDatabase at 0x18dacc553d0>

In [32]:
sdb.get_usable_table_names()

['tbl']

In [33]:
print(sdb.get_table_info())


CREATE TABLE tbl (
	"Trade #" BIGINT, 
	"Type" VARCHAR, 
	"Signal" VARCHAR, 
	"Date/Time" VARCHAR, 
	"Price USD" FLOAT, 
	"Contracts" BIGINT, 
	"Profit USD" FLOAT, 
	"Profit %" FLOAT, 
	"Cum. Profit USD" FLOAT, 
	"Cum. Profit %" FLOAT, 
	"Run-up USD" FLOAT, 
	"Run-up %" FLOAT, 
	"Drawdown USD" FLOAT, 
	"Drawdown %" FLOAT
)

/*
3 rows from tbl table:
Trade #	Type	Signal	Date/Time	Price USD	Contracts	Profit USD	Profit %	Cum. Profit USD	Cum. Profit %	Run-up USD	Run-up %	Drawdown USD	Drawdown %
23	Exit Long	Open	None	None	None	None	None	None	None	None	None	None	None
23	Entry Long	{'take_profit': 1.0703532, 'stop_loss': 1.06848, 'lots': 23.81}	2023-11-13 16:30	1.06932	None	None	None	None	None	None	None	None	None
22	Exit Long	exit	2023-11-10 22:50	1.06728	1852000	-2000.16	-0.1	9729.93	-0.2	203.72	0.01	2000.16	0.1
*/


In [27]:
# import os
# sconn.close()
# os.remove('../demo/sqlite_database.db')

### Workaround: Pandas => SQAlchemy engine => SQLDatabase

- [pandas.DataFrame.to_sql — pandas 2.1.3 documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html)

In [37]:
import pandas as pd

In [35]:
(sqlite_engine := create_engine('sqlite:///../demo/test.db'))

Engine(sqlite:///../demo/test.db)

In [38]:
(df := pd.read_csv('../demo/demo.csv'))

Unnamed: 0,Trade #,Type,Signal,Date/Time,Price USD,Contracts,Profit USD,Profit %,Cum. Profit USD,Cum. Profit %,Run-up USD,Run-up %,Drawdown USD,Drawdown %
0,23,Exit Long,Open,,,,,,,,,,,
1,23,Entry Long,"{'take_profit': 1.0703532, 'stop_loss': 1.0684...",2023-11-13 16:30,1.06932,,,,,,,,,
2,22,Exit Long,exit,2023-11-10 22:50,1.06728,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1
3,22,Entry Long,"{'take_profit': 1.0696884, 'stop_loss': 1.0672...",2023-11-10 22:45,1.06836,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1
4,21,Exit Long,exit,2023-11-10 17:45,1.06769,3390000.0,2474.7,0.07,11730.09,0.25,2474.7,0.07,271.2,0.01
5,21,Entry Long,"{'take_profit': 1.0676857, 'stop_loss': 1.0663...",2023-11-10 17:40,1.06696,3390000.0,2474.7,0.07,11730.09,0.25,2474.7,0.07,271.2,0.01
6,20,Exit Short,exit,2023-11-08 22:30,1.06803,2312000.0,-2011.44,-0.08,9255.39,-0.2,138.72,0.01,2011.44,0.08
7,20,Entry Short,"{'take_profit': 1.06609605, 'stop_loss': 1.068...",2023-11-08 22:25,1.06716,2312000.0,-2011.44,-0.08,9255.39,-0.2,138.72,0.01,2011.44,0.08
8,19,Exit Long,exit,2023-11-08 21:25,1.06646,3030000.0,-1999.8,-0.06,11266.83,-0.2,424.2,0.01,1999.8,0.06
9,19,Entry Long,"{'take_profit': 1.0679318, 'stop_loss': 1.0664...",2023-11-08 21:15,1.06712,3030000.0,-1999.8,-0.06,11266.83,-0.2,424.2,0.01,1999.8,0.06


In [39]:
df.to_sql('table', sqlite_engine)

46

In [41]:
(sdb2 := SQLDatabase(sqlite_engine))

<langchain.utilities.sql_database.SQLDatabase at 0x18db0b3ded0>

In [42]:
sdb2.get_usable_table_names()

['table']

In [45]:
print(sdb2.get_table_info())


CREATE TABLE "table" (
	"index" BIGINT, 
	"Trade #" BIGINT, 
	"Type" TEXT, 
	"Signal" TEXT, 
	"Date/Time" TEXT, 
	"Price USD" FLOAT, 
	"Contracts" FLOAT, 
	"Profit USD" FLOAT, 
	"Profit %" FLOAT, 
	"Cum. Profit USD" FLOAT, 
	"Cum. Profit %" FLOAT, 
	"Run-up USD" FLOAT, 
	"Run-up %" FLOAT, 
	"Drawdown USD" FLOAT, 
	"Drawdown %" FLOAT
)

/*
3 rows from table table:
index	Trade #	Type	Signal	Date/Time	Price USD	Contracts	Profit USD	Profit %	Cum. Profit USD	Cum. Profit %	Run-up USD	Run-up %	Drawdown USD	Drawdown %
0	23	Exit Long	Open	None	None	None	None	None	None	None	None	None	None	None
1	23	Entry Long	{'take_profit': 1.0703532, 'stop_loss': 1.06848, 'lots': 23.81}	2023-11-13 16:30	1.06932	None	None	None	None	None	None	None	None	None
2	22	Exit Long	exit	2023-11-10 22:50	1.06728	1852000.0	-2000.16	-0.1	9729.93	-0.2	203.72	0.01	2000.16	0.1
*/
