Skip to content

Latest commit

 

History

History
305 lines (240 loc) · 17.3 KB

20231202_01.md

File metadata and controls

305 lines (240 loc) · 17.3 KB

PostgreSQL, MySQL, DuckDB, DuckDB+mysqlscanner 1000万记录的简单分析SQL对比

作者

digoal

日期

2023-12-02

标签

PostgreSQL , DuckDB , duckdb mysqlscanner , MySQL , 分析SQL , 性能对比


背景

DuckDB发布了MySQL scanner, 支持在DuckDB内直接读写mysql table, 本文对比了PostgreSQL, MySQL, DuckDB, DuckDB+mysqlscanner 1000万记录的简单分析SQL. 结果当然是DuckDB完胜. 但是DuckDB+mysqlscanner给了MySQL一个不错的选择, 0成本加速MySQL分析.

本例使用以下docker images测试:

x86_64

# 拉取镜像, 第一次拉取一次即可. 或者需要的时候执行, 将更新到最新镜像版本.      
docker pull registry.cn-hangzhou.aliyuncs.com/digoal/opensource_database:pg14_with_exts      
      
# 启动容器      
docker run --platform linux/amd64 -d -it -P --cap-add=SYS_PTRACE --cap-add SYS_ADMIN --privileged=true --name pg --shm-size=1g registry.cn-hangzhou.aliyuncs.com/digoal/opensource_database:pg14_with_exts    
    
# 进入容器      
docker exec -ti pg bash       

arm64

# 拉取镜像, 第一次拉取一次即可. 或者需要的时候执行, 将更新到最新镜像版本.      
docker pull registry.cn-hangzhou.aliyuncs.com/digoal/opensource_database:pg14_with_exts_arm64      
      
# 启动容器      
docker run -d -it -P --cap-add=SYS_PTRACE --cap-add SYS_ADMIN --privileged=true --name pg --shm-size=1g registry.cn-hangzhou.aliyuncs.com/digoal/opensource_database:pg14_with_exts_arm64    
    
# 进入容器      
docker exec -ti pg bash      

例子

(本步骤可以忽略, 以上x86的docker image已经集成mysql8) 首先在容器中部署mysql8最新版本, 下次把mysql直接打包到容器中.

https://dev.mysql.com/downloads/repo/apt/

cd /tmp    
    
wget https://repo.mysql.com//mysql-apt-config_0.8.28-1_all.deb    
dpkg -i mysql-apt-config_0.8.28-1_all.deb    
apt-get update    
apt-get install -y mysql-server    
    
    
root@502e0ed09ea5:/tmp# mysql --version    
mysql  Ver 8.0.35 for Linux on x86_64 (MySQL Community Server - GPL)    

配置my.cnf

vi /etc/mysql/my.cnf    
    
# add by digoal    
[mysqld]    
secure_file_priv = /tmp    
socket = /tmp/mysql.sock    

启动MySQL

root@502e0ed09ea5:~# mysqld -u root -D    
mysqld will log errors to /var/log/mysql/error.log    
mysqld is running as pid 1721    

在PostgreSQL中测试1000万数据简单分析SQL, 21.4秒.

root@502e0ed09ea5:~# psql    
psql (14.10 (Debian 14.10-1.pgdg110+1))    
Type "help" for help.    
    
postgres=# \timing    
Timing is on.    
postgres=# create unlogged table tbl (id int, info text, ts timestamp, c1 int);    
CREATE TABLE    
Time: 51.401 ms    
postgres=# insert into tbl select id,md5(random()::text) as info,clock_timestamp()::timestamp(0) as ts, (random()*10000)::int as c1 from generate_series(1,10000000) id;    
INSERT 0 10000000    
Time: 15152.684 ms (00:15.153)    
postgres=# show max_parallel_workers_per_gather ;    
 max_parallel_workers_per_gather     
---------------------------------    
 0    
(1 row)    
    
Time: 0.506 ms    
postgres=# select count(*), count(distinct id), count(distinct info), count(distinct ts), count(distinct c1) from tbl;    
    
  count   |  count   |  count   | count | count     
----------+----------+----------+-------+-------    
 10000000 | 10000000 | 10000000 |    15 | 10001    
(1 row)    
Time: 21414.875 ms (00:21.415)    

使用PostgreSQL导出csv, 以便写入MySQL

psql -c "copy (select id,md5(random()::text) as info,clock_timestamp()::timestamp(0) as ts, (random()*10000)::int as c1 from generate_series(1,10000000) id) to '/tmp/1.csv' with (format csv, header on)"    
    
COPY 10000000    

将数据导入MySQL, 直接使用mysql 1000万记录简单分析SQL 61秒.

mysql -S /tmp/mysql.sock    
    
mysql>  use  mysql    
mysql> create table tbl (id int(10), info varchar(100), ts timestamp, c1 int(10));    
mysql> load data infile '/tmp/1.csv' into table tbl fields terminated by ',' ignore 1 lines;    
    
mysql> CREATE   USER 'digoal'@'localhost' IDENTIFIED BY '123456';    
mysql> grant all on *.* to 'digoal'@'localhost';    
mysql> grant all on mysql to 'digoal'@'localhost';    
    
mysql> select count(*), count(distinct id), count(distinct info), count(distinct ts), count(distinct c1) from tbl;    
+----------+--------------------+----------------------+--------------------+--------------------+    
| count(*) | count(distinct id) | count(distinct info) | count(distinct ts) | count(distinct c1) |    
+----------+--------------------+----------------------+--------------------+--------------------+    
| 10000000 |           10000000 |             10000000 |                 18 |              10001 |    
+----------+--------------------+----------------------+--------------------+--------------------+    
1 row in set (1 min 0.97 sec)    
    
    
mysql> select * from tbl limit 10;    
+------+----------------------------------+---------------------+------+    
| id   | info                             | ts                  | c1   |    
+------+----------------------------------+---------------------+------+    
|    1 | 2a9758cafc8fb25c1d960e770b7a8463 | 2023-12-02 01:50:23 |  518 |    
|    2 | 02145c7be91aadae3bcc868fea3198d5 | 2023-12-02 01:50:23 | 9623 |    
|    3 | 98c1ea502f3f58c900ccc88886a09515 | 2023-12-02 01:50:23 | 2143 |    
|    4 | 413bb65f65606b3bf9b10ac03336f407 | 2023-12-02 01:50:23 | 3321 |    
|    5 | d8fd444cfbcd82c3e2b6e5622ee0e439 | 2023-12-02 01:50:23 | 3750 |    
|    6 | ac8709aacb3dca453e88d09dca58b83b | 2023-12-02 01:50:23 |  616 |    
|    7 | 7f55f7d483a7b86e68c1f9fff7ab68a5 | 2023-12-02 01:50:23 | 1976 |    
|    8 | b2ec10593c1c780c83c3801bf306d12f | 2023-12-02 01:50:23 | 5812 |    
|    9 | 1c31fc5c2bcdd6fa113b589e22fb0285 | 2023-12-02 01:50:23 | 7636 |    
|   10 | b05c40989b9c6fadfc84330937390052 | 2023-12-02 01:50:23 | 9923 |    
+------+----------------------------------+---------------------+------+    
10 rows in set (0.00 sec)    

使用DuckDB mysqlscanner, 1000万记录简单分析SQL 41秒.

su - postgres    
./duckdb    
    
    
install 'mysql';    
load 'mysql';    
.timer on    
    
ATTACH 'host=localhost   user=digoal password=123456 port=0 database=mysql' AS my (TYPE mysql_scanner, READ_ONLY) ;    
    
    
D  use  my;    
Run Time (s): real 0.002   user 0.000000 sys 0.000860    
    
D show tables;    
┌──────────────────────────────────────────────────────┐    
│                         name                         │    
│                       varchar                        │    
├──────────────────────────────────────────────────────┤    
│ columns_priv                                         │    
│ component                                            │    
│ db                                                   │    
│ default_roles                                        │    
│ engine_cost                                          │    
│ func                                                 │    
│ general_log                                          │    
│ global_grants                                        │    
│ gtid_executed                                        │    
│ help_category                                        │    
│ help_keyword                                         │    
│ help_relation                                        │    
│ help_topic                                           │    
│ innodb_index_stats                                   │    
│ innodb_table_stats                                   │    
│ ndb_binlog_index                                     │    
│ password_history                                     │    
│ plugin                                               │    
│ procs_priv                                           │    
│ proxies_priv                                         │    
│ replication_asynchronous_connection_failover         │    
│ replication_asynchronous_connection_failover_managed │    
│ replication_group_configuration_version              │    
│ replication_group_member_actions                     │    
│ role_edges                                           │    
│ server_cost                                          │    
│ servers                                              │    
│ slave_master_info                                    │    
│ slave_relay_log_info                                 │    
│ slave_worker_info                                    │    
│ slow_log                                             │    
│ tables_priv                                          │    
│ tbl                                                  │    
│ time_zone                                            │    
│ time_zone_leap_second                                │    
│ time_zone_name                                       │    
│ time_zone_transition                                 │    
│ time_zone_transition_type                            │    
│   r                                                 │    
├──────────────────────────────────────────────────────┤    
│                       39 rows                        │    
└──────────────────────────────────────────────────────┘    
Run Time (s): real 0.011   user 0.018249 sys 0.004298    
    
D select count(*) from tbl;    
100% ▕████████████████████████████████████████████████████████████▏     
┌──────────────┐    
│ count_star() │    
│    int64     │    
├──────────────┤    
│     10000000 │    
└──────────────┘    
Run Time (s): real 6.570   user 0.788166 sys 0.183505    
    
D select count(*), count(distinct id), count(distinct info), count(distinct ts), count(distinct c1) from tbl;    
100% ▕████████████████████████████████████████████████████████████▏     
┌──────────────┬────────────────────┬──────────────────────┬────────────────────┬────────────────────┐    
│ count_star() │ count(DISTINCT id) │ count(DISTINCT info) │ count(DISTINCT ts) │ count(DISTINCT c1) │    
│    int64     │       int64        │        int64         │       int64        │       int64        │    
├──────────────┼────────────────────┼──────────────────────┼────────────────────┼────────────────────┤    
│     10000000 │           10000000 │             10000000 │                 18 │              10001 │    
└──────────────┴────────────────────┴──────────────────────┴────────────────────┴────────────────────┘    
Run Time (s): real 40.862   user 17.440941 sys 1.945406    

将数据导入DuckDB inmemory db, 1000万记录简单分析SQL 1.4秒.

D show databases;    
┌───────────────┐    
│ database_name │    
│    varchar    │    
├───────────────┤    
│ memory        │    
│ my            │    
└───────────────┘    
Run Time (s): real 0.003   user 0.001269 sys 0.002332    
D   use memory;    
Run Time (s): real 0.002   user 0.000797 sys 0.000000    
D create table t1 as select * from my.tbl;    
100% ▕████████████████████████████████████████████████████████████▏     
Run Time (s): real 35.903   user 12.694567 sys 1.437991    
D DESCRIBE t1;    
┌─────────────┬──────────────────────────┬─────────┬─────────┬─────────┬───────┐    
│ column_name │       column_type        │  null   │   key   │ default │ extra │    
│   varchar   │         varchar          │ varchar │ varchar │ varchar │ int32 │    
├─────────────┼──────────────────────────┼─────────┼─────────┼─────────┼───────┤    
│ id          │ INTEGER                  │ YES     │         │         │       │    
│ info        │ VARCHAR                  │ YES     │         │         │       │    
│ ts          │ TIMESTAMP WITH TIME ZONE │ YES     │         │         │       │    
│ c1          │ INTEGER                  │ YES     │         │         │       │    
└─────────────┴──────────────────────────┴─────────┴─────────┴─────────┴───────┘    
Run Time (s): real 0.050   user 0.075262 sys 0.024439    
    
    
D select count(*), count(distinct id), count(distinct info), count(distinct ts), count(distinct c1) from t1;    
┌──────────────┬────────────────────┬──────────────────────┬────────────────────┬────────────────────┐    
│ count_star() │ count(DISTINCT id) │ count(DISTINCT info) │ count(DISTINCT ts) │ count(DISTINCT c1) │    
│    int64     │       int64        │        int64         │       int64        │       int64        │    
├──────────────┼────────────────────┼──────────────────────┼────────────────────┼────────────────────┤    
│     10000000 │           10000000 │             10000000 │                 18 │              10001 │    
└──────────────┴────────────────────┴──────────────────────┴────────────────────┴────────────────────┘    
Run Time (s): real 1.404   user 6.835519 sys 0.699056    

digoal's wechat