# DuckDB

In [1]:
import duckdb
import pandas as pd

In [2]:
%load_ext sql
conn = duckdb.connect()
%sql conn --alias duckdb

In [3]:
# Set configurations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

## Most Basic Usage

In [4]:
%sql SELECT 'Off and flying!' AS a_duckdb_column

Unnamed: 0,a_duckdb_column
0,Off and flying!


In [5]:
%sql res << SELECT 'Off and flying!' AS a_duckdb_column

In [6]:
res

Unnamed: 0,a_duckdb_column
0,Off and flying!


In [7]:
type(res)

pandas.core.frame.DataFrame

## Load Existing Data

In [8]:
%%sql
CREATE TABLE csv_tbl AS SELECT * FROM
read_csv('../demo/demo.csv', AUTO_DETECT=True)

Unnamed: 0,Count
0,46


In [9]:
%%sql
SELECT * FROM '../demo/demo.csv';

Unnamed: 0,Trade #,Type,Signal,Date/Time,Price USD,Contracts,Profit USD,Profit %,Cum. Profit USD,Cum. Profit %,Run-up USD,Run-up %,Drawdown USD,Drawdown %
0,23,Exit Long,Open,,,,,,,,,,,
1,23,Entry Long,"{'take_profit': 1.0703532, 'stop_loss': 1.0684...",2023-11-13 16:30,1.06932,,,,,,,,,
2,22,Exit Long,exit,2023-11-10 22:50,1.06728,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1
3,22,Entry Long,"{'take_profit': 1.0696884, 'stop_loss': 1.0672...",2023-11-10 22:45,1.06836,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1
4,21,Exit Long,exit,2023-11-10 17:45,1.06769,3390000.0,2474.7,0.07,11730.09,0.25,2474.7,0.07,271.2,0.01
5,21,Entry Long,"{'take_profit': 1.0676857, 'stop_loss': 1.0663...",2023-11-10 17:40,1.06696,3390000.0,2474.7,0.07,11730.09,0.25,2474.7,0.07,271.2,0.01
6,20,Exit Short,exit,2023-11-08 22:30,1.06803,2312000.0,-2011.44,-0.08,9255.39,-0.2,138.72,0.01,2011.44,0.08
7,20,Entry Short,"{'take_profit': 1.06609605, 'stop_loss': 1.068...",2023-11-08 22:25,1.06716,2312000.0,-2011.44,-0.08,9255.39,-0.2,138.72,0.01,2011.44,0.08
8,19,Exit Long,exit,2023-11-08 21:25,1.06646,3030000.0,-1999.8,-0.06,11266.83,-0.2,424.2,0.01,1999.8,0.06
9,19,Entry Long,"{'take_profit': 1.0679318, 'stop_loss': 1.0664...",2023-11-08 21:15,1.06712,3030000.0,-1999.8,-0.06,11266.83,-0.2,424.2,0.01,1999.8,0.06


In [10]:
%%sql
SELECT * FROM csv_tbl LIMIT 3;

Unnamed: 0,Trade #,Type,Signal,Date/Time,Price USD,Contracts,Profit USD,Profit %,Cum. Profit USD,Cum. Profit %,Run-up USD,Run-up %,Drawdown USD,Drawdown %
0,23,Exit Long,Open,,,,,,,,,,,
1,23,Entry Long,"{'take_profit': 1.0703532, 'stop_loss': 1.0684...",2023-11-13 16:30,1.06932,,,,,,,,,
2,22,Exit Long,exit,2023-11-10 22:50,1.06728,1852000.0,-2000.16,-0.1,9729.93,-0.2,203.72,0.01,2000.16,0.1


In [None]:
%%sql
DESCRIBE csv_tbl;

## Ploting

- [Plotting — Python documentation](https://jupysql.ploomber.io/en/latest/plot.html)

In [None]:
%sqlplot boxplot --table csv_tbl --column 'Price USD'

In [None]:
%%sql --save exit_trade --no-execute
SELECT *
FROM csv_tbl
WHERE Signal = 'exit'

In [None]:
%sqlplot histogram --table exit_trade --column "Drawdown USD" --bins 10

In [None]:
ax = %sqlplot histogram --table exit_trade --column "Run-up USD" --bins 50
ax.grid()
ax.set_title("Run-up Histogram")
_ = ax.set_xlabel("USD")

## Casting

### Datetime Related

- [Date Part Functions - DuckDB](https://duckdb.org/docs/sql/functions/datepart)
- [Time Functions - DuckDB](https://duckdb.org/docs/sql/functions/time)
- [Timestamp Functions - DuckDB](https://duckdb.org/docs/sql/functions/timestamp.html)
- [Date Format Functions - DuckDB](https://duckdb.org/docs/sql/functions/dateformat.html)

Type

- [Time Types - DuckDB](https://duckdb.org/docs/sql/data_types/time)
    - The TIME type should only be used in rare cases, where the date part of the timestamp can be disregarded. Most applications should use the TIMESTAMP types to represent their timestamps.
- [Timestamp Types - DuckDB](https://duckdb.org/docs/sql/data_types/timestamp.html)

[**Casting - DuckDB**](https://duckdb.org/docs/sql/expressions/cast.html)

In [None]:
%%sql
FROM csv_tbl SELECT "Date/Time"

In [None]:
%%sql
SELECT strptime("Date/Time", '%Y-%m-%d %H:%M') AS datetime FROM csv_tbl

In [None]:
%%sql
SELECT strptime("Date/Time", '%Y-%m-%d %H:%M')::TIME AS time FROM csv_tbl

## Modify Table (ALTER)

- [ALTER TABLE Statement - DuckDB](https://duckdb.org/docs/sql/statements/alter_table)

### Add Column

- [UPDATE Statement - DuckDB](https://duckdb.org/docs/sql/statements/update)

In [None]:
%%sql
ALTER TABLE csv_tbl ADD COLUMN time_with_timezone TIMESTAMP;
UPDATE csv_tbl SET time_with_timezone = strptime("Date/Time", '%Y-%m-%d %H:%M') AT TIME ZONE 'Asia/Taipei';

In [None]:
%%sql
SELECT * FROM csv_tbl

### Remove (DROP) Column

In [None]:
%%sql
ALTER TABLE csv_tbl DROP time_with_timezone;

## Group By

In [None]:
%%sql
SELECT Type, COUNT(*) AS Count
FROM csv_tbl
GROUP BY Type
ORDER BY Count DESC;