Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ header:
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_orc.hql"
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_parquet.hql"
- "docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/"
- "docker/thirdparties/docker-compose/hive/scripts/data/**"
- "docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl"
- "conf/mysql_ssl_default_certificate/*"
- "conf/mysql_ssl_default_certificate/client_certificate/ca.pem"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
create database if not exists multi_catalog;

use multi_catalog;

CREATE external TABLE `datev2_csv`(
`id` int,
`day` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/datev2_csv'
TBLPROPERTIES (
'transient_lastDdlTime'='1688118691');

msck repair table datev2_csv;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}/create_table.hql"

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
create database if not exists multi_catalog;

use multi_catalog;

CREATE external TABLE `datev2_orc`(
`id` int,
`day` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/datev2_orc'
TBLPROPERTIES (
'transient_lastDdlTime'='1688118707');

msck repair table datev2_orc;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
create database if not exists multi_catalog;

use multi_catalog;

CREATE external TABLE `datev2_parquet`(
`id` int,
`day` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/datev2_parquet'
TBLPROPERTIES (
'transient_lastDdlTime'='1688118725');

msck repair table datev2_parquet;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `hive_upper_case_orc`(
`id` int,
`name` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_upper_case_orc'
TBLPROPERTIES (
'spark.sql.create.version'='3.2.1',
'spark.sql.sources.schema'='{"type":"struct","fields":[{"name":"ID","type":"integer","nullable":true,"metadata":{}},{"name":"NAME","type":"string","nullable":true,"metadata":{}}]}',
'transient_lastDdlTime'='1674189057');

msck repair table hive_upper_case_orc;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `hive_upper_case_parquet`(
`id` int,
`name` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_upper_case_parquet'
TBLPROPERTIES (
'spark.sql.create.version'='3.2.1',
'spark.sql.sources.schema'='{"type":"struct","fields":[{"name":"ID","type":"integer","nullable":true,"metadata":{}},{"name":"NAME","type":"string","nullable":true,"metadata":{}}]}',
'transient_lastDdlTime'='1674189051');

msck repair table hive_upper_case_parquet;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `parquet_lz4_compression`(
`col_int` int,
`col_smallint` smallint,
`col_tinyint` tinyint,
`col_bigint` bigint,
`col_float` float,
`col_double` double,
`col_boolean` boolean,
`col_string` string,
`col_char` char(10),
`col_varchar` varchar(25),
`col_date` date,
`col_timestamp` timestamp,
`col_decimal` decimal(10,2))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/parquet_lz4_compression'
TBLPROPERTIES (
'parquet.compression'='LZ4',
'transient_lastDdlTime'='1700723950');

msck repair table parquet_lz4_compression;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `parquet_lzo_compression`(
`col_int` int,
`col_smallint` smallint,
`col_tinyint` tinyint,
`col_bigint` bigint,
`col_float` float,
`col_double` double,
`col_boolean` boolean,
`col_string` string,
`col_char` char(10),
`col_varchar` varchar(25),
`col_date` date,
`col_timestamp` timestamp,
`col_decimal` decimal(10,2))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/parquet_lzo_compression'
TBLPROPERTIES (
'parquet.compression'='LZO',
'transient_lastDdlTime'='1701173147');

msck repair table parquet_lzo_compression;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Loading