diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 34a173cd984..113235daa9d 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -362,8 +362,6 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' - - name: free disk space - run: tools/github/free_disk_space.sh - name: run updated modules integration test (part-3) if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' run: | diff --git a/config/plugin_config b/config/plugin_config index 42fc280a65a..76a7254b378 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -76,5 +76,4 @@ connector-tablestore connector-selectdb-cloud connector-hbase connector-amazonsqs -connector-easysearch --end-- \ No newline at end of file diff --git a/docs/en/connector-v2/sink/CosFile.md b/docs/en/connector-v2/sink/CosFile.md index 6c88e922947..f0d6517a055 100644 --- a/docs/en/connector-v2/sink/CosFile.md +++ b/docs/en/connector-v2/sink/CosFile.md @@ -29,7 +29,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Options @@ -58,9 +57,6 @@ By default, we use 2PC commit to ensure `exactly-once` | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. | ### path [string] @@ -114,7 +110,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. @@ -193,18 +189,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ## Example For text file format with `have_partition` and `custom_filename` and `sink_columns` diff --git a/docs/en/connector-v2/sink/Easysearch.md b/docs/en/connector-v2/sink/Easysearch.md deleted file mode 100644 index f474735082d..00000000000 --- a/docs/en/connector-v2/sink/Easysearch.md +++ /dev/null @@ -1,202 +0,0 @@ -# INFINI Easysearch - -## Support Those Engines - -> Spark
-> Flink
-> SeaTunnel Zeta
- -## Description - -A sink plugin which use send data to `INFINI Easysearch`. - -## Using Dependency - -> Depenndency [easysearch-client](https://central.sonatype.com/artifact/com.infinilabs/easysearch-client) -> - ## Key features - -- [ ] [exactly-once](../../concept/connector-v2-features.md) -- [x] [cdc](../../concept/connector-v2-features.md) - -:::tip - -Engine Supported - -* Supported all versions released by [INFINI Easysearch](https://www.infini.com/download/?product=easysearch). - -::: - -## Data Type Mapping - -| Easysearch Data Type | SeaTunnel Data Type | -|-----------------------------|----------------------| -| STRING
KEYWORD
TEXT | STRING | -| BOOLEAN | BOOLEAN | -| BYTE | BYTE | -| SHORT | SHORT | -| INTEGER | INT | -| LONG | LONG | -| FLOAT
HALF_FLOAT | FLOAT | -| DOUBLE | DOUBLE | -| Date | LOCAL_DATE_TIME_TYPE | - -## Sink Options - -| name | type | required | default value | -|-------------------------|---------|----------|---------------| -| hosts | array | yes | - | -| index | string | yes | - | -| primary_keys | list | no | | -| key_delimiter | string | no | `_` | -| username | string | no | | -| password | string | no | | -| max_retry_count | int | no | 3 | -| max_batch_size | int | no | 10 | -| tls_verify_certificate | boolean | no | true | -| tls_verify_hostnames | boolean | no | true | -| tls_keystore_path | string | no | - | -| tls_keystore_password | string | no | - | -| tls_truststore_path | string | no | - | -| tls_truststore_password | string | no | - | -| common-options | | no | - | - -### hosts [array] - -`INFINI Easysearch` cluster http address, the format is `host:port` , allowing multiple hosts to be specified. Such as `["host1:9200", "host2:9200"]`. - -### index [string] - -`INFINI Easysearch` `index` name.Index support contains variables of field name,such as `seatunnel_${age}`,and the field must appear at seatunnel row. -If not, we will treat it as a normal index. - -### primary_keys [list] - -Primary key fields used to generate the document `_id`, this is cdc required options. - -### key_delimiter [string] - -Delimiter for composite keys ("_" by default), e.g., "$" would result in document `_id` "KEY1$KEY2$KEY3". - -### username [string] - -security username - -### password [string] - -security password - -### max_retry_count [int] - -one bulk request max try size - -### max_batch_size [int] - -batch bulk doc max size - -### tls_verify_certificate [boolean] - -Enable certificates validation for HTTPS endpoints - -### tls_verify_hostname [boolean] - -Enable hostname validation for HTTPS endpoints - -### tls_keystore_path [string] - -The path to the PEM or JKS key store. This file must be readable by the operating system user running SeaTunnel. - -### tls_keystore_password [string] - -The key password for the key store specified - -### tls_truststore_path [string] - -The path to PEM or JKS trust store. This file must be readable by the operating system user running SeaTunnel. - -### tls_truststore_password [string] - -The key password for the trust store specified - -### common options - -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details - -## Examples - -Simple - -```bash -sink { - Easysearch { - hosts = ["localhost:9200"] - index = "seatunnel-${age}" - } -} -``` - -CDC(Change data capture) event - -```bash -sink { - Easysearch { - hosts = ["localhost:9200"] - index = "seatunnel-${age}" - - # cdc required options - primary_keys = ["key1", "key2", ...] - } -} -``` - -SSL (Disable certificates validation) - -```hocon -sink { - Easysearch { - hosts = ["https://localhost:9200"] - username = "admin" - password = "admin" - - tls_verify_certificate = false - } -} -``` - -SSL (Disable hostname validation) - -```hocon -sink { - Easysearch { - hosts = ["https://localhost:9200"] - username = "admin" - password = "admin" - - tls_verify_hostname = false - } -} -``` - -SSL (Enable certificates validation) - -```hocon -sink { - Easysearch { - hosts = ["https://localhost:9200"] - username = "admin" - password = "admin" - - tls_keystore_path = "${your Easysearch home}/config/certs/http.p12" - tls_keystore_password = "${your password}" - } -} -``` - -## Changelog - -### 2.3.4 2023-11-16 - -- Add Easysearch Sink Connector -- Support http/https protocol -- Support CDC write DELETE/UPDATE/INSERT events - diff --git a/docs/en/connector-v2/sink/FtpFile.md b/docs/en/connector-v2/sink/FtpFile.md index 9a3af0e744c..cdc3512485e 100644 --- a/docs/en/connector-v2/sink/FtpFile.md +++ b/docs/en/connector-v2/sink/FtpFile.md @@ -27,7 +27,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Options @@ -57,9 +56,6 @@ By default, we use 2PC commit to ensure `exactly-once` | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. | ### host [string] @@ -119,7 +115,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. @@ -198,18 +194,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ## Example For text file format simple config diff --git a/docs/en/connector-v2/sink/HdfsFile.md b/docs/en/connector-v2/sink/HdfsFile.md index 4df905ff439..535b4fc6cda 100644 --- a/docs/en/connector-v2/sink/HdfsFile.md +++ b/docs/en/connector-v2/sink/HdfsFile.md @@ -21,7 +21,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml - [x] compress codec - [x] lzo @@ -46,7 +45,7 @@ Output data to hdfs file | custom_filename | boolean | no | false | Whether you need custom the filename | | file_name_expression | string | no | "${transactionId}" | Only used when `custom_filename` is `true`.`file_name_expression` describes the file expression which will be created into the `path`. We can add the variable `${now}` or `${uuid}` in the `file_name_expression`, like `test_${uuid}_${now}`,`${now}` represents the current time, and its format can be defined by specifying the option `filename_time_format`.Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. | | filename_time_format | string | no | "yyyy.MM.dd" | Only used when `custom_filename` is `true`.When the format in the `file_name_expression` parameter is `xxxx-${now}` , `filename_time_format` can specify the time format of the path, and the default value is `yyyy.MM.dd` . The commonly used time formats are listed as follows:[y:Year,M:Month,d:Day of month,H:Hour in day (0-23),m:Minute in hour,s:Second in minute] | -| file_format_type | string | no | "csv" | We supported as the following file types:`text` `json` `csv` `orc` `parquet` `excel` `xml`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | +| file_format_type | string | no | "csv" | We supported as the following file types:`text` `json` `csv` `orc` `parquet` `excel`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | | field_delimiter | string | no | '\001' | Only used when file_format is text,The separator between columns in a row of data. Only needed by `text` file format. | | row_delimiter | string | no | "\n" | Only used when file_format is text,The separator between rows in a file. Only needed by `text` file format. | | have_partition | boolean | no | false | Whether you need processing partitions. | @@ -64,9 +63,6 @@ Output data to hdfs file | common-options | object | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | | max_rows_in_memory | int | no | - | Only used when file_format is excel.When File Format is Excel,The maximum number of data items that can be cached in the memory. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel.Writer the sheet of the workbook | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml, specifies the tag name of the root element within the XML file. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml, specifies the tag name of the data rows within the XML file | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml, specifies Whether to process data using the tag attribute format. | ### Tips diff --git a/docs/en/connector-v2/sink/Hive.md b/docs/en/connector-v2/sink/Hive.md index eec92b46b1b..2ede5d07893 100644 --- a/docs/en/connector-v2/sink/Hive.md +++ b/docs/en/connector-v2/sink/Hive.md @@ -30,18 +30,17 @@ By default, we use 2PC commit to ensure `exactly-once` ## Options -| name | type | required | default value | -|-------------------------------|---------|----------|----------------| -| table_name | string | yes | - | -| metastore_uri | string | yes | - | -| compress_codec | string | no | none | -| hdfs_site_path | string | no | - | -| hive_site_path | string | no | - | -| krb5_path | string | no | /etc/krb5.conf | -| kerberos_principal | string | no | - | -| kerberos_keytab_path | string | no | - | -| abort_drop_partition_metadata | boolean | no | true | -| common-options | | no | - | +| name | type | required | default value | +|----------------------|--------|----------|----------------| +| table_name | string | yes | - | +| metastore_uri | string | yes | - | +| compress_codec | string | no | none | +| hdfs_site_path | string | no | - | +| hive_site_path | string | no | - | +| krb5_path | string | no | /etc/krb5.conf | +| kerberos_principal | string | no | - | +| kerberos_keytab_path | string | no | - | +| common-options | | no | - | ### table_name [string] @@ -71,10 +70,6 @@ The principal of kerberos The keytab path of kerberos -### abort_drop_partition_metadata [list] - -Flag to decide whether to drop partition metadata from Hive Metastore during an abort operation. Note: this only affects the metadata in the metastore, the data in the partition will always be deleted(data generated during the synchronization process). - ### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details diff --git a/docs/en/connector-v2/sink/LocalFile.md b/docs/en/connector-v2/sink/LocalFile.md index e16c81c3f3a..2f88f0fe720 100644 --- a/docs/en/connector-v2/sink/LocalFile.md +++ b/docs/en/connector-v2/sink/LocalFile.md @@ -27,7 +27,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Options @@ -52,9 +51,6 @@ By default, we use 2PC commit to ensure `exactly-once` | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. | | enable_header_write | boolean | no | false | Only used when file_format_type is text,csv.
false:don't write header,true:write header. | ### path [string] @@ -93,7 +89,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. @@ -172,18 +168,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ### enable_header_write [boolean] Only used when file_format_type is text,csv.false:don't write header,true:write header. diff --git a/docs/en/connector-v2/sink/OssFile.md b/docs/en/connector-v2/sink/OssFile.md index 4c85121c20c..7cbab4347de 100644 --- a/docs/en/connector-v2/sink/OssFile.md +++ b/docs/en/connector-v2/sink/OssFile.md @@ -32,7 +32,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Data Type Mapping @@ -109,9 +108,6 @@ If write to `csv`, `text` file type, All column will be string. | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. | ### path [string] @@ -165,7 +161,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${Now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. @@ -244,18 +240,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ## How to Create an Oss Data Synchronization Jobs The following example demonstrates how to create a data synchronization job that reads data from Fake Source and writes it to the Oss: diff --git a/docs/en/connector-v2/sink/OssJindoFile.md b/docs/en/connector-v2/sink/OssJindoFile.md index 1a55c319704..40441ea83ec 100644 --- a/docs/en/connector-v2/sink/OssJindoFile.md +++ b/docs/en/connector-v2/sink/OssJindoFile.md @@ -33,7 +33,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Options @@ -62,9 +61,6 @@ By default, we use 2PC commit to ensure `exactly-once` | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. | ### path [string] @@ -118,7 +114,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. @@ -197,18 +193,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ## Example For text file format with `have_partition` and `custom_filename` and `sink_columns` diff --git a/docs/en/connector-v2/sink/S3File.md b/docs/en/connector-v2/sink/S3File.md index a3811ea34ac..84bca3cb80c 100644 --- a/docs/en/connector-v2/sink/S3File.md +++ b/docs/en/connector-v2/sink/S3File.md @@ -22,7 +22,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Description @@ -117,9 +116,6 @@ If write to `csv`, `text` file type, All column will be string. | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml, specifies the tag name of the root element within the XML file. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml, specifies the tag name of the data rows within the XML file | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml, specifies Whether to process data using the tag attribute format. | | hadoop_s3_properties | map | no | | If you need to add a other option, you could add it here and refer to this [link](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) | | schema_save_mode | Enum | no | CREATE_SCHEMA_WHEN_NOT_EXIST | Before turning on the synchronous task, do different treatment of the target path | | data_save_mode | Enum | no | APPEND_DATA | Before opening the synchronous task, the data file in the target path is differently processed | @@ -171,7 +167,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. @@ -250,18 +246,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ### schema_save_mode[Enum] Before turning on the synchronous task, do different treatment of the target path. diff --git a/docs/en/connector-v2/sink/SftpFile.md b/docs/en/connector-v2/sink/SftpFile.md index 448d1dd050d..7bb3f12559b 100644 --- a/docs/en/connector-v2/sink/SftpFile.md +++ b/docs/en/connector-v2/sink/SftpFile.md @@ -27,7 +27,6 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] orc - [x] json - [x] excel - - [x] xml ## Options @@ -56,9 +55,6 @@ By default, we use 2PC commit to ensure `exactly-once` | common-options | object | no | - | | | max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | | sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | -| xml_root_tag | string | no | RECORDS | Only used when file_format is xml. | -| xml_row_tag | string | no | RECORD | Only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Only used when file_format is xml. | ### host [string] @@ -112,7 +108,7 @@ When the format in the `file_name_expression` parameter is `xxxx-${now}` , `file We supported as the following file types: -`text` `json` `csv` `orc` `parquet` `excel` `xml` +`text` `json` `csv` `orc` `parquet` `excel` Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. @@ -191,18 +187,6 @@ When File Format is Excel,The maximum number of data items that can be cached in Writer the sheet of the workbook -### xml_root_tag [string] - -Specifies the tag name of the root element within the XML file. - -### xml_row_tag [string] - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Specifies Whether to process data using the tag attribute format. - ## Example For text file format with `have_partition` and `custom_filename` and `sink_columns` diff --git a/docs/en/connector-v2/source/CosFile.md b/docs/en/connector-v2/source/CosFile.md index 7f0d6020800..406c86fab5b 100644 --- a/docs/en/connector-v2/source/CosFile.md +++ b/docs/en/connector-v2/source/CosFile.md @@ -26,7 +26,6 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] orc - [x] json - [x] excel - - [x] xml ## Description @@ -61,8 +60,6 @@ To use this connector you need put hadoop-cos-{hadoop.version}-{version}.jar and | time_format | string | no | HH:mm:ss | | schema | config | no | - | | sheet_name | string | no | - | -| xml_row_tag | string | no | - | -| xml_use_attr_format | boolean | no | - | | file_filter_pattern | string | no | - | | compress_codec | string | no | none | | common-options | | no | - | @@ -75,7 +72,7 @@ The source file path. File type, supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` If you assign file type to `json`, you should also assign schema option to tell connector how to parse data to the row you want. @@ -239,7 +236,7 @@ default `HH:mm:ss` ### schema [config] -Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata). +Only need to be configured when the file_format_type are text, json, excel or csv ( Or other format we can't read the schema from metadata). #### fields [Config] @@ -251,18 +248,6 @@ Only need to be configured when file_format is excel. Reader the sheet of the workbook. -### xml_row_tag [string] - -Only need to be configured when file_format is xml. - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Only need to be configured when file_format is xml. - -Specifies Whether to process data using the tag attribute format. - ### file_filter_pattern [string] Filter pattern, which used for filtering files. diff --git a/docs/en/connector-v2/source/Easysearch.md b/docs/en/connector-v2/source/Easysearch.md deleted file mode 100644 index d94609c7723..00000000000 --- a/docs/en/connector-v2/source/Easysearch.md +++ /dev/null @@ -1,209 +0,0 @@ -# Easysearch - -> Easysearch source connector - -## Support Those Engines - -> Spark
-> Flink
-> SeaTunnel Zeta
- -## Description - -Used to read data from INFINI Easysearch. - -## Using Dependency - -> Depenndency [easysearch-client](https://central.sonatype.com/artifact/com.infinilabs/easysearch-client) - -## Key features - -- [x] [batch](../../concept/connector-v2-features.md) -- [ ] [stream](../../concept/connector-v2-features.md) -- [ ] [exactly-once](../../concept/connector-v2-features.md) -- [x] [column projection](../../concept/connector-v2-features.md) -- [ ] [parallelism](../../concept/connector-v2-features.md) -- [ ] [support user-defined split](../../concept/connector-v2-features.md) - -:::tip - -Engine Supported - -* Supported all versions released by [INFINI Easysearch](https://www.infini.com/download/?product=easysearch). - -::: - -## Data Type Mapping - -| Easysearch Data Type | SeaTunnel Data Type | -|-----------------------------|----------------------| -| STRING
KEYWORD
TEXT | STRING | -| BOOLEAN | BOOLEAN | -| BYTE | BYTE | -| SHORT | SHORT | -| INTEGER | INT | -| LONG | LONG | -| FLOAT
HALF_FLOAT | FLOAT | -| DOUBLE | DOUBLE | -| Date | LOCAL_DATE_TIME_TYPE | - -### hosts [array] - -Easysearch cluster http address, the format is `host:port`, allowing multiple hosts to be specified. Such as `["host1:9200", "host2:9200"]`. - -### username [string] - -security username. - -### password [string] - -security password. - -### index [string] - -Easysearch index name, support * fuzzy matching. - -### source [array] - -The fields of index. -You can get the document id by specifying the field `_id`.If sink _id to other index,you need specify an alias for _id due to the Easysearch limit. -If you don't config source, you must config `schema`. - -### query [json] - -Easysearch DSL. -You can control the range of data read. - -### scroll_time [String] - -Amount of time Easysearch will keep the search context alive for scroll requests. - -### scroll_size [int] - -Maximum number of hits to be returned with each Easysearch scroll request. - -### schema - -The structure of the data, including field names and field types. -If you don't config schema, you must config `source`. - -### tls_verify_certificate [boolean] - -Enable certificates validation for HTTPS endpoints - -### tls_verify_hostname [boolean] - -Enable hostname validation for HTTPS endpoints - -### tls_keystore_path [string] - -The path to the PEM or JKS key store. This file must be readable by the operating system user running SeaTunnel. - -### tls_keystore_password [string] - -The key password for the key store specified - -### tls_truststore_path [string] - -The path to PEM or JKS trust store. This file must be readable by the operating system user running SeaTunnel. - -### tls_truststore_password [string] - -The key password for the trust store specified - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details - -## Examples - -simple - -```hocon -Easysearch { - hosts = ["localhost:9200"] - index = "seatunnel-*" - source = ["_id","name","age"] - query = {"range":{"firstPacket":{"gte":1700407367588,"lte":1700407367588}}} -} -``` - -complex - -```hocon -Easysearch { - hosts = ["Easysearch:9200"] - index = "st_index" - schema = { - fields { - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_tinyint = tinyint - c_smallint = smallint - c_int = int - c_bigint = bigint - c_float = float - c_double = double - c_decimal = "decimal(2, 1)" - c_bytes = bytes - c_date = date - c_timestamp = timestamp - } - } - query = {"range":{"firstPacket":{"gte":1700407367588,"lte":1700407367588}}} -} -``` - -SSL (Disable certificates validation) - -```hocon -source { - Easysearch { - hosts = ["https://localhost:9200"] - username = "admin" - password = "admin" - - tls_verify_certificate = false - } -} -``` - -SSL (Disable hostname validation) - -```hocon -source { - Easysearch { - hosts = ["https://localhost:9200"] - username = "admin" - password = "admin" - - tls_verify_hostname = false - } -} -``` - -SSL (Enable certificates validation) - -```hocon -source { - Easysearch { - hosts = ["https://localhost:9200"] - username = "admin" - password = "admin" - - tls_keystore_path = "${your Easysearch home}/config/certs/http.p12" - tls_keystore_password = "${your password}" - } -} -``` - -## Changelog - -### next version - -- Add Easysearch Source Connector -- Support https protocol -- Support DSL - diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md index e103c14a9ae..ee231bb087b 100644 --- a/docs/en/connector-v2/source/FtpFile.md +++ b/docs/en/connector-v2/source/FtpFile.md @@ -21,7 +21,6 @@ - [x] csv - [x] json - [x] excel - - [x] xml ## Description @@ -55,8 +54,6 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you | skip_header_row_number | long | no | 0 | | schema | config | no | - | | sheet_name | string | no | - | -| xml_row_tag | string | no | - | -| xml_use_attr_format | boolean | no | - | | file_filter_pattern | string | no | - | | compress_codec | string | no | none | | common-options | | no | - | @@ -85,7 +82,7 @@ The source file path. File type, supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` If you assign file type to `json` , you should also assign schema option to tell connector how to parse data to the row you want. @@ -224,7 +221,7 @@ then SeaTunnel will skip the first 2 lines from source files ### schema [config] -Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata). +Only need to be configured when the file_format_type are text, json, excel or csv ( Or other format we can't read the schema from metadata). The schema information of upstream data. @@ -236,18 +233,6 @@ The read column list of the data source, user can use it to implement field proj Reader the sheet of the workbook,Only used when file_format_type is excel. -### xml_row_tag [string] - -Only need to be configured when file_format is xml. - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Only need to be configured when file_format is xml. - -Specifies Whether to process data using the tag attribute format. - ### compress_codec [string] The compress codec of files and the details that supported as the following shown: diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md index 5534dcd9653..ffcb0b68678 100644 --- a/docs/en/connector-v2/source/HdfsFile.md +++ b/docs/en/connector-v2/source/HdfsFile.md @@ -26,7 +26,6 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] orc - [x] json - [x] excel - - [x] xml ## Description @@ -43,9 +42,9 @@ Read data from hdfs file system. | Name | Type | Required | Default | Description | |---------------------------|---------|----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | path | string | yes | - | The source file path. | -| file_format_type | string | yes | - | We supported as the following file types:`text` `json` `csv` `orc` `parquet` `excel` `xml`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | +| file_format_type | string | yes | - | We supported as the following file types:`text` `json` `csv` `orc` `parquet` `excel`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | | fs.defaultFS | string | yes | - | The hadoop cluster address that start with `hdfs://`, for example: `hdfs://hadoopcluster` | -| read_columns | list | yes | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel,xml].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. | +| read_columns | list | yes | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. | | hdfs_site_path | string | no | - | The path of `hdfs-site.xml`, used to load ha configuration of namenodes | | delimiter/field_delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. default `\001`, the same as hive's default delimiter | | parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `hdfs://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields:[name:tyrantlucifer,age:26].Tips:Do not define partition fields in schema option. | @@ -59,8 +58,6 @@ Read data from hdfs file system. | skip_header_row_number | long | no | 0 | Skip the first few lines, but only for the txt and csv.For example, set like following:`skip_header_row_number = 2`.then Seatunnel will skip the first 2 lines from source files | | schema | config | no | - | the schema fields of upstream data | | sheet_name | string | no | - | Reader the sheet of the workbook,Only used when file_format is excel. | -| xml_row_tag | string | no | - | Specifies the tag name of the data rows within the XML file, only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Specifies whether to process data using the tag attribute format, only used when file_format is xml. | | compress_codec | string | no | none | The compress codec of files | | common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | diff --git a/docs/en/connector-v2/source/Hive.md b/docs/en/connector-v2/source/Hive.md index 5d51a19f89c..14306ef953d 100644 --- a/docs/en/connector-v2/source/Hive.md +++ b/docs/en/connector-v2/source/Hive.md @@ -33,19 +33,20 @@ Read all the data in a split in a pollNext call. What splits are read will be sa ## Options -| name | type | required | default value | -|----------------------|--------|----------|----------------| -| table_name | string | yes | - | -| metastore_uri | string | yes | - | -| krb5_path | string | no | /etc/krb5.conf | -| kerberos_principal | string | no | - | -| kerberos_keytab_path | string | no | - | -| hdfs_site_path | string | no | - | -| hive_site_path | string | no | - | -| read_partitions | list | no | - | -| read_columns | list | no | - | -| compress_codec | string | no | none | -| common-options | | no | - | +| name | type | required | default value | +|-------------------------------|---------|----------|----------------| +| table_name | string | yes | - | +| metastore_uri | string | yes | - | +| krb5_path | string | no | /etc/krb5.conf | +| kerberos_principal | string | no | - | +| kerberos_keytab_path | string | no | - | +| hdfs_site_path | string | no | - | +| hive_site_path | string | no | - | +| read_partitions | list | no | - | +| read_columns | list | no | - | +| abort_drop_partition_metadata | boolean | no | true | +| compress_codec | string | no | none | +| common-options | | no | - | ### table_name [string] @@ -86,6 +87,10 @@ The keytab file path of kerberos authentication The read column list of the data source, user can use it to implement field projection. +### abort_drop_partition_metadata [list] + +Flag to decide whether to drop partition metadata from Hive Metastore during an abort operation. Note: this only affects the metadata in the metastore, the data in the partition will always be deleted(data generated during the synchronization process). + ### compress_codec [string] The compress codec of files and the details that supported as the following shown: diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md index 172049498cc..4d20ca532d1 100644 --- a/docs/en/connector-v2/source/LocalFile.md +++ b/docs/en/connector-v2/source/LocalFile.md @@ -26,7 +26,6 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] orc - [x] json - [x] excel - - [x] xml ## Description @@ -55,8 +54,6 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you | skip_header_row_number | long | no | 0 | | schema | config | no | - | | sheet_name | string | no | - | -| xml_row_tag | string | no | - | -| xml_use_attr_format | boolean | no | - | | file_filter_pattern | string | no | - | | compress_codec | string | no | none | | common-options | | no | - | @@ -70,7 +67,7 @@ The source file path. File type, supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` If you assign file type to `json`, you should also assign schema option to tell connector how to parse data to the row you want. @@ -218,7 +215,7 @@ then SeaTunnel will skip the first 2 lines from source files ### schema [config] -Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata). +Only need to be configured when the file_format_type are text, json, excel or csv ( Or other format we can't read the schema from metadata). #### fields [Config] @@ -230,18 +227,6 @@ Only need to be configured when file_format is excel. Reader the sheet of the workbook. -### xml_row_tag [string] - -Only need to be configured when file_format is xml. - -Specifies the tag name of the data rows within the XML file. - -### xml_use_attr_format [boolean] - -Only need to be configured when file_format is xml. - -Specifies Whether to process data using the tag attribute format. - ### file_filter_pattern [string] Filter pattern, which used for filtering files. diff --git a/docs/en/connector-v2/source/OssFile.md b/docs/en/connector-v2/source/OssFile.md index 85d922644de..233eb76800f 100644 --- a/docs/en/connector-v2/source/OssFile.md +++ b/docs/en/connector-v2/source/OssFile.md @@ -37,13 +37,12 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] orc - [x] json - [x] excel - - [x] xml ## Data Type Mapping Data type mapping is related to the type of file being read, We supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` ### JSON File Type @@ -189,28 +188,26 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto ## Options -| name | type | required | default value | Description | -|---------------------------|---------|----------|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| path | string | yes | - | The Oss path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option | -| file_format_type | string | yes | - | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` `xml` | -| bucket | string | yes | - | The bucket address of oss file system, for example: `oss://seatunnel-test`. | -| endpoint | string | yes | - | fs oss endpoint | -| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection. The file type supported column projection as the following shown: `text` `csv` `parquet` `orc` `json` `excel` `xml` . If the user wants to use this feature when reading `text` `json` `csv` files, the "schema" option must be configured. | -| access_key | string | no | - | | -| access_secret | string | no | - | | -| delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. Default `\001`, the same as hive's default delimiter. | -| parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `oss://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields: name="tyrantlucifer", age=16 | -| date_format | string | no | yyyy-MM-dd | Date type format, used to tell connector how to convert string to date, supported as the following formats:`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd`. default `yyyy-MM-dd` | -| datetime_format | string | no | yyyy-MM-dd HH:mm:ss | Datetime type format, used to tell connector how to convert string to datetime, supported as the following formats:`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` | -| time_format | string | no | HH:mm:ss | Time type format, used to tell connector how to convert string to time, supported as the following formats:`HH:mm:ss` `HH:mm:ss.SSS` | -| skip_header_row_number | long | no | 0 | Skip the first few lines, but only for the txt and csv. For example, set like following:`skip_header_row_number = 2`. Then SeaTunnel will skip the first 2 lines from source files | -| schema | config | no | - | The schema of upstream data. | -| sheet_name | string | no | - | Reader the sheet of the workbook,Only used when file_format is excel. | -| xml_row_tag | string | no | - | Specifies the tag name of the data rows within the XML file, only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Specifies whether to process data using the tag attribute format, only used when file_format is xml. | -| compress_codec | string | no | none | Which compress codec the files used. | -| file_filter_pattern | string | no | | `*.txt` means you only need read the files end with `.txt` | -| common-options | config | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | +| name | type | required | default value | Description | +|---------------------------|---------|----------|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| path | string | yes | - | The Oss path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option | +| file_format_type | string | yes | - | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` | +| bucket | string | yes | - | The bucket address of oss file system, for example: `oss://seatunnel-test`. | +| endpoint | string | yes | - | fs oss endpoint | +| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection. The file type supported column projection as the following shown: `text` `csv` `parquet` `orc` `json` `excel` . If the user wants to use this feature when reading `text` `json` `csv` files, the "schema" option must be configured. | +| access_key | string | no | - | | +| access_secret | string | no | - | | +| delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. Default `\001`, the same as hive's default delimiter. | +| parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `oss://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields: name="tyrantlucifer", age=16 | +| date_format | string | no | yyyy-MM-dd | Date type format, used to tell connector how to convert string to date, supported as the following formats:`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd`. default `yyyy-MM-dd` | +| datetime_format | string | no | yyyy-MM-dd HH:mm:ss | Datetime type format, used to tell connector how to convert string to datetime, supported as the following formats:`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` | +| time_format | string | no | HH:mm:ss | Time type format, used to tell connector how to convert string to time, supported as the following formats:`HH:mm:ss` `HH:mm:ss.SSS` | +| skip_header_row_number | long | no | 0 | Skip the first few lines, but only for the txt and csv. For example, set like following:`skip_header_row_number = 2`. Then SeaTunnel will skip the first 2 lines from source files | +| schema | config | no | - | The schema of upstream data. | +| sheet_name | string | no | - | Reader the sheet of the workbook,Only used when file_format is excel. | +| compress_codec | string | no | none | Which compress codec the files used. | +| file_filter_pattern | string | no | | `*.txt` means you only need read the files end with `.txt` | +| common-options | config | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | ### compress_codec [string] @@ -228,7 +225,7 @@ Filter pattern, which used for filtering files. ### schema [config] -Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata). +Only need to be configured when the file_format_type are text, json, excel or csv ( Or other format we can't read the schema from metadata). #### fields [Config] diff --git a/docs/en/connector-v2/source/OssJindoFile.md b/docs/en/connector-v2/source/OssJindoFile.md index d1a28265539..27b710cfb8a 100644 --- a/docs/en/connector-v2/source/OssJindoFile.md +++ b/docs/en/connector-v2/source/OssJindoFile.md @@ -26,7 +26,6 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] orc - [x] json - [x] excel - - [x] xml ## Description @@ -65,8 +64,6 @@ It only supports hadoop version **2.9.X+**. | skip_header_row_number | long | no | 0 | | schema | config | no | - | | sheet_name | string | no | - | -| xml_row_tag | string | no | - | -| xml_use_attr_format | boolean | no | - | | file_filter_pattern | string | no | - | | compress_codec | string | no | none | | common-options | | no | - | @@ -79,7 +76,7 @@ The source file path. File type, supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` If you assign file type to `json`, you should also assign schema option to tell connector how to parse data to the row you want. @@ -243,7 +240,7 @@ then SeaTunnel will skip the first 2 lines from source files ### schema [config] -Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata). +Only need to be configured when the file_format_type are text, json, excel or csv ( Or other format we can't read the schema from metadata). #### fields [Config] diff --git a/docs/en/connector-v2/source/S3File.md b/docs/en/connector-v2/source/S3File.md index 0387af044d6..7ad6f5735cc 100644 --- a/docs/en/connector-v2/source/S3File.md +++ b/docs/en/connector-v2/source/S3File.md @@ -26,7 +26,6 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] orc - [x] json - [x] excel - - [x] xml ## Description @@ -49,7 +48,7 @@ Read data from aws s3 file system. Data type mapping is related to the type of file being read, We supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` ### JSON File Type @@ -198,11 +197,11 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto | name | type | required | default value | Description | |---------------------------------|---------|----------|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | path | string | yes | - | The s3 path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option | -| file_format_type | string | yes | - | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` `xml` | +| file_format_type | string | yes | - | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` | | bucket | string | yes | - | The bucket address of s3 file system, for example: `s3n://seatunnel-test`, if you use `s3a` protocol, this parameter should be `s3a://seatunnel-test`. | | fs.s3a.endpoint | string | yes | - | fs s3a endpoint | | fs.s3a.aws.credentials.provider | string | yes | com.amazonaws.auth.InstanceProfileCredentialsProvider | The way to authenticate s3a. We only support `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` and `com.amazonaws.auth.InstanceProfileCredentialsProvider` now. More information about the credential provider you can see [Hadoop AWS Document](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Simple_name.2Fsecret_credentials_with_SimpleAWSCredentialsProvider.2A) | -| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection. The file type supported column projection as the following shown: `text` `csv` `parquet` `orc` `json` `excel` `xml` . If the user wants to use this feature when reading `text` `json` `csv` files, the "schema" option must be configured. | +| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection. The file type supported column projection as the following shown: `text` `csv` `parquet` `orc` `json` `excel` . If the user wants to use this feature when reading `text` `json` `csv` files, the "schema" option must be configured. | | access_key | string | no | - | Only used when `fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider ` | | access_secret | string | no | - | Only used when `fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider ` | | hadoop_s3_properties | map | no | - | If you need to add other option, you could add it here and refer to this [link](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) | @@ -214,8 +213,6 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto | skip_header_row_number | long | no | 0 | Skip the first few lines, but only for the txt and csv. For example, set like following:`skip_header_row_number = 2`. Then SeaTunnel will skip the first 2 lines from source files | | schema | config | no | - | The schema of upstream data. | | sheet_name | string | no | - | Reader the sheet of the workbook,Only used when file_format is excel. | -| xml_row_tag | string | no | - | Specifies the tag name of the data rows within the XML file, only valid for XML files. | -| xml_use_attr_format | boolean | no | - | Specifies whether to process data using the tag attribute format, only valid for XML files. | | compress_codec | string | no | none | | common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | diff --git a/docs/en/connector-v2/source/SftpFile.md b/docs/en/connector-v2/source/SftpFile.md index 0f179749fbc..4f6e9af44bc 100644 --- a/docs/en/connector-v2/source/SftpFile.md +++ b/docs/en/connector-v2/source/SftpFile.md @@ -21,7 +21,6 @@ - [x] csv - [x] json - [x] excel - - [x] xml ## Description @@ -87,8 +86,6 @@ The File does not have a specific type list, and we can indicate which SeaTunnel | skip_header_row_number | Long | No | 0 | Skip the first few lines, but only for the txt and csv.
For example, set like following:
`skip_header_row_number = 2`
then SeaTunnel will skip the first 2 lines from source files | | read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection. | | sheet_name | String | No | - | Reader the sheet of the workbook,Only used when file_format is excel. | -| xml_row_tag | string | no | - | Specifies the tag name of the data rows within the XML file, only used when file_format is xml. | -| xml_use_attr_format | boolean | no | - | Specifies whether to process data using the tag attribute format, only used when file_format is xml. | | schema | Config | No | - | Please check #schema below | | compress_codec | String | No | None | The compress codec of files and the details that supported as the following shown:
- txt: `lzo` `None`
- json: `lzo` `None`
- csv: `lzo` `None`
- orc: `lzo` `snappy` `lz4` `zlib` `None`
- parquet: `lzo` `snappy` `lz4` `gzip` `brotli` `zstd` `None`
Tips: excel type does Not support any compression format | | common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | @@ -96,7 +93,7 @@ The File does not have a specific type list, and we can indicate which SeaTunnel ### file_format_type [string] File type, supported as the following file types: -`text` `csv` `parquet` `orc` `json` `excel` `xml` +`text` `csv` `parquet` `orc` `json` `excel` If you assign file type to `json`, you should also assign schema option to tell connector how to parse data to the row you want. For example: upstream data is the following: diff --git a/docs/en/other-engine/flink.md b/docs/en/other-engine/flink.md index 567bfb7ca10..f2d45383744 100644 --- a/docs/en/other-engine/flink.md +++ b/docs/en/other-engine/flink.md @@ -1,6 +1,6 @@ # Seatunnel runs on Flink -Flink is a powerful high-performance distributed stream processing engine,More information about it you can,You can search for `Apache Flink` +Flink is a powerful high-performance distributed stream processing engine,More information about it you can,You can search for `Apacke Flink` ### Set Flink configuration information in the job diff --git a/docs/en/seatunnel-engine/rest-api.md b/docs/en/seatunnel-engine/rest-api.md index 4a56c7da7e2..6c4a4064fcb 100644 --- a/docs/en/seatunnel-engine/rest-api.md +++ b/docs/en/seatunnel-engine/rest-api.md @@ -111,14 +111,6 @@ network: } ``` -When we can't get the job info, the response will be: - -```json -{ - "jobId" : "" -} -``` - ------------------------------------------------------------------------------------------ diff --git a/docs/en/start-v2/kubernetes/kubernetes.mdx b/docs/en/start-v2/kubernetes/kubernetes.mdx index 15dd1f503a1..dc913478ab4 100644 --- a/docs/en/start-v2/kubernetes/kubernetes.mdx +++ b/docs/en/start-v2/kubernetes/kubernetes.mdx @@ -51,7 +51,7 @@ RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunne RUN tar -xzvf apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz RUN mv apache-seatunnel-${SEATUNNEL_VERSION} ${SEATUNNEL_HOME} -RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION} +RUN cd ${SEATUNNEL_HOME}||sh bin/install-plugin.sh ${SEATUNNEL_VERSION} ``` Then run the following commands to build the image: @@ -79,7 +79,7 @@ RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunne RUN tar -xzvf apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz RUN mv apache-seatunnel-${SEATUNNEL_VERSION} ${SEATUNNEL_HOME} -RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION} +RUN cd ${SEATUNNEL_HOME}||sh bin/install-plugin.sh ${SEATUNNEL_VERSION} ``` Then run the following commands to build the image: @@ -107,7 +107,7 @@ RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunne RUN tar -xzvf apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz RUN mv apache-seatunnel-${SEATUNNEL_VERSION} ${SEATUNNEL_HOME} RUN mkdir -p $SEATUNNEL_HOME/logs -RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION} +RUN cd ${SEATUNNEL_HOME}||sh bin/install-plugin.sh ${SEATUNNEL_VERSION} ``` Then run the following commands to build the image: diff --git a/docs/zh/concept/JobEnvConfig.md b/docs/zh/concept/JobEnvConfig.md deleted file mode 100644 index c9f3cd9fda6..00000000000 --- a/docs/zh/concept/JobEnvConfig.md +++ /dev/null @@ -1,52 +0,0 @@ -# JobEnvConfig - -本文档描述了env的配置信息,公共参数可以在所有引擎中使用。为了更好的区分引擎参数,其他引擎的附加参数需要携带前缀。 -在flink引擎中,我们使用`flink.`作为前缀。在spark引擎中,我们不使用任何前缀来修改参数,因为官方的spark参数本身就是以`spark.`开头。 - -## 公共参数 - -以下配置参数对所有引擎通用: - -### job.name - -该参数配置任务名称。 - -### jars - -第三方包可以通过`jars`加载,例如:`jars="file://local/jar1.jar;file://local/jar2.jar"` - -### job.mode - -通过`job.mode`你可以配置任务是在批处理模式还是流处理模式。例如:`job.mode = "BATCH"` 或者 `job.mode = "STREAMING"` - -### checkpoint.interval - -获取定时调度检查点的时间间隔。 - -在`STREAMING`模式下,检查点是必须的,如果不设置,将从应用程序配置文件`seatunnel.yaml`中获取。 在`BATCH`模式下,您可以通过不设置此参数来禁用检查点。 - -### parallelism - -该参数配置source和sink的并行度。 - -### shade.identifier - -指定加密方式,如果您没有加密或解密配置文件的需求,此选项可以忽略。 - -更多详细信息,您可以参考文档 [config-encryption-decryption](../../en/connector-v2/Config-Encryption-Decryption.md) - -## Flink 引擎参数 - -这里列出了一些与 Flink 中名称相对应的 SeaTunnel 参数名称,并非全部,更多内容请参考官方 [flink documentation](https://flink.apache.org/) for more. - -| Flink 配置名称 | SeaTunnel 配置名称 | -|---------------------------------|---------------------------------------| -| pipeline.max-parallelism | flink.pipeline.max-parallelism | -| execution.checkpointing.mode | flink.execution.checkpointing.mode | -| execution.checkpointing.timeout | flink.execution.checkpointing.timeout | -| ... | ... | - -## Spark 引擎参数 - -由于spark配置项并无调整,这里就不列出来了,请参考官方 [spark documentation](https://spark.apache.org/). - diff --git a/docs/zh/concept/config.md b/docs/zh/concept/config.md deleted file mode 100644 index c00425ca030..00000000000 --- a/docs/zh/concept/config.md +++ /dev/null @@ -1,191 +0,0 @@ ---- - -sidebar_position: 2 -------------------- - -# 配置文件简介 - -In SeaTunnel, the most important thing is the Config file, through which users can customize their own data -synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to -configure the Config file. - -在SeaTunnel中,最重要的事情就是配置文件,尽管用户可以自定义他们自己的数据同步需求以发挥SeaTunnel最大的潜力。那么接下来, -我将会向你介绍如何设置配置文件。 - -The main format of the Config file is `hocon`, for more details of this format type you can refer to [HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md), -BTW, we also support the `json` format, but you should know that the name of the config file should end with `.json` - -配置文件的主要格式是 `hocon`, 有关该格式类型的更多信息你可以参考[HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md), -顺便提一下,我们也支持 `json`格式,但你应该知道配置文件的名称应该是以 `.json`结尾。 - -## 例子 - -在你阅读之前,你可以在发布包中的config目录[这里](https://github.com/apache/seatunnel/tree/dev/config)找到配置文件的例子。 - -## 配置文件结构 - -配置文件类似下面。 - -### hocon - -```hocon -env { - job.mode = "BATCH" -} - -source { - FakeSource { - result_table_name = "fake" - row.num = 100 - schema = { - fields { - name = "string" - age = "int" - card = "int" - } - } - } -} - -transform { - Filter { - source_table_name = "fake" - result_table_name = "fake1" - fields = [name, card] - } -} - -sink { - Clickhouse { - host = "clickhouse:8123" - database = "default" - table = "seatunnel_console" - fields = ["name", "card"] - username = "default" - password = "" - source_table_name = "fake1" - } -} -``` - -### json - -```json - -{ - "env": { - "job.mode": "batch" - }, - "source": [ - { - "plugin_name": "FakeSource", - "result_table_name": "fake", - "row.num": 100, - "schema": { - "fields": { - "name": "string", - "age": "int", - "card": "int" - } - } - } - ], - "transform": [ - { - "plugin_name": "Filter", - "source_table_name": "fake", - "result_table_name": "fake1", - "fields": ["name", "card"] - } - ], - "sink": [ - { - "plugin_name": "Clickhouse", - "host": "clickhouse:8123", - "database": "default", - "table": "seatunnel_console", - "fields": ["name", "card"], - "username": "default", - "password": "", - "source_table_name": "fake1" - } - ] -} - -``` - -正如你看到的,配置文件包括几个部分:env, source, transform, sink。不同的模块有不同的功能。 -当你了解了这些模块后,你就会懂得SeaTunnel如何工作。 - -### env - -用于添加引擎可选的参数,不管是什么引擎(Spark 或者 Flink),对应的可选参数应该在这里填写。 - -注意,我们按照引擎分离了参数,对于公共参数,我们可以像以前一样配置。对于Flink和Spark引擎,其参数的具体配置规则可以参考[JobEnvConfig](./JobEnvConfig.md)。 - - - -### source - -source用于定义SeaTunnel在哪儿检索数据,并将检索的数据用于下一步。 -可以同时定义多个source。目前支持的source请看[Source of SeaTunnel](../../en/connector-v2/source)。每种source都有自己特定的参数用来 -定义如何检索数据,SeaTunnel也抽象了每种source所使用的参数,例如 `result_table_name` 参数,用于指定当前source生成的数据的名称, -方便后续其他模块使用。 - -### transform - -当我们有了数据源之后,我们可能需要对数据进行进一步的处理,所以我们就有了transform模块。当然,这里使用了“可能”这个词, -这意味着我们也可以直接将transform视为不存在,直接从source到sink。像下面这样。 - -```hocon -env { - job.mode = "BATCH" -} - -source { - FakeSource { - result_table_name = "fake" - row.num = 100 - schema = { - fields { - name = "string" - age = "int" - card = "int" - } - } - } -} - -sink { - Clickhouse { - host = "clickhouse:8123" - database = "default" - table = "seatunnel_console" - fields = ["name", "age", "card"] - username = "default" - password = "" - source_table_name = "fake1" - } -} -``` - -与source类似, transform也有属于每个模块的特定参数。目前支持的source请看。目前支持的transform请看 [Transform V2 of SeaTunnel](../../en/transform-v2) - - - -### sink - -我们使用SeaTunnel的作用是将数据从一个地方同步到其它地方,所以定义数据如何写入,写入到哪里是至关重要的。通过SeaTunnel提供的 -sink模块,你可以快速高效地完成这个操作。Sink和source非常相似,区别在于读取和写入。所以去看看我们[支持的sink](../../en/connector-v2/sink)吧。 - -### 其它 - -你会疑惑当定义了多个source和多个sink时,每个sink读取哪些数据,每个transform读取哪些数据?我们使用`result_table_name` 和 -`source_table_name` 两个键配置。每个source模块都会配置一个`result_table_name`来指示数据源生成的数据源名称,其它transform和sink -模块可以使用`source_table_name` 引用相应的数据源名称,表示要读取数据进行处理。然后transform,作为一个中间的处理模块,可以同时使用 -`result_table_name` 和 `source_table_name` 配置。但你会发现在上面的配置例子中,不是每个模块都配置了这些参数,因为在SeaTunnel中, -有一个默认的约定,如果这两个参数没有配置,则使用上一个节点的最后一个模块生成的数据。当只有一个source时这是非常方便的。 - -## 此外 - -如果你想了解更多关于格式配置的详细信息,请查看 [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md)。 diff --git a/docs/zh/concept/connector-v2-features.md b/docs/zh/concept/connector-v2-features.md deleted file mode 100644 index 9708eb373d1..00000000000 --- a/docs/zh/concept/connector-v2-features.md +++ /dev/null @@ -1,70 +0,0 @@ -# Connector V2 功能简介 - -## Connector V2 和 Connector V1 之间的不同 - -从 https://github.com/apache/seatunnel/issues/1608 我们添加了 Connector V2 特性。 -Connector V2 是基于SeaTunnel Connector API接口定义的连接器。不像Connector V1,Connector V2 支持如下特性: - -* **多引擎支持** SeaTunnel Connector API 是引擎独立的API。基于这个API开发的连接器可以在多个引擎上运行。目前支持Flink和Spark引擎,后续我们会支持其它的引擎。 -* **多引擎版本支持** 通过翻译层将连接器与引擎解耦,解决了大多数连接器需要修改代码才能支持新版本底层引擎的问题。 -* **流批一体** Connector V2 可以支持批处理和流处理。我们不需要为批和流分别开发连接器。 -* **多路复用JDBC/Log连接。** Connector V2支持JDBC资源复用和共享数据库日志解析。 - -## Source Connector 特性 - -Source connector有一些公共的核心特性,每个source connector在不同程度上支持它们。 - -### 精确一次(exactly-once) - -如果数据源中的每条数据仅由源向下游发送一次,我们认为该source connector支持精确一次(exactly-once)。 - -在SeaTunnel中, 我们可以保存读取的 **Split** 和 它的 **offset**(当时读取的数据被分割时的位置,例如行号, 字节大小, 偏移量等) 作为检查点时的 **StateSnapshot** 。 如果任务重新启动, 我们会得到最后的 **StateSnapshot** -然后定位到上次读取的 **Split** 和 **offset**,继续向下游发送数据。 - -例如 `File`, `Kafka`。 - -### 列投影(column projection) - -如果连接器支持仅从数据源读取指定列(请注意,如果先读取所有列,然后通过元数据(schema)过滤不需要的列,则此方法不是真正的列投影) - -例如 `JDBCSource` 可以使用sql定义读取列。 - -`KafkaSource` 从主题中读取所有内容然后使用`schema`过滤不必要的列, 这不是真正的`列投影`。 - -### 批(batch) - -批处理作业模式,读取的数据是有界的,当所有数据读取完成后作业将停止。 - -### 流(stream) - -流式作业模式,数据读取无界,作业永不停止。 - -### 并行性(parallelism) - -并行执行的Source Connector支持配置 `parallelism`,每个并发会创建一个任务来读取数据。 -在**Parallelism Source Connector**中,source会被分割成多个split,然后枚举器会将 split 分配给 SourceReader 进行处理。 - -### 支持用户自定义split - -用户可以配置分割规则。 - -### 支持多表读取 - -支持在一个 SeaTunnel 作业中读取多个表 - -## Sink Connector 的特性 - -Sink connector有一些公共的核心特性,每个sink connector在不同程度上支持它们。 - -### 精确一次(exactly-once) - -当任意一条数据流入分布式系统时,如果系统在整个处理过程中仅准确处理任意一条数据一次,且处理结果正确,则认为系统满足精确一次一致性。 - -对于sink connector,如果任何数据只写入目标一次,则sink connector支持精确一次。 通常有两种方法可以实现这一目标: - -* 目标数据库支持key去重。例如 `MySQL`, `Kudu`。 -* 目标支持 **XA 事务**(事务可以跨会话使用。即使创建事务的程序已经结束,新启动的程序也只需要知道最后一个事务的ID就可以重新提交或回滚事务)。 然后我们可以使用 **两阶段提交** 来确保 * 精确一次**。 例如:`File`, `MySQL`. - -### cdc(更改数据捕获,change data capture) - -如果sink connector支持基于主键写入行类型(INSERT/UPDATE_BEFORE/UPDATE_AFTER/DELETE),我们认为它支持cdc(更改数据捕获,change data capture)。 diff --git a/docs/zh/concept/schema-feature.md b/docs/zh/concept/schema-feature.md deleted file mode 100644 index cc69b6d83ea..00000000000 --- a/docs/zh/concept/schema-feature.md +++ /dev/null @@ -1,263 +0,0 @@ -# Schema 特性简介 - -## 为什么我们需要Schema - -某些NoSQL数据库或消息队列没有严格限制schema,因此无法通过api获取schema。 -这时需要定义一个schema来转换为TableSchema并获取数据。 - -## SchemaOptions - -我们可以使用SchemaOptions定义schema, SchemaOptions包含了一些定义schema的配置。 例如:columns, primaryKey, constraintKeys。 - -``` -schema = { - table = "database.schema.table" - schema_first = false - comment = "comment" - columns = [ - ... - ] - primaryKey { - ... - } - - constraintKeys { - ... - } -} -``` - -### table - -schema所属的表标识符的表全名,包含数据库、schema、表名。 例如 `database.schema.table`、`database.table`、`table`。 - -### schema_first - -默认是false。 - -如果schema_first是true, schema会优先使用, 这意味着如果我们设置 `table = "a.b"`, `a` 会被解析为schema而不是数据库, 那么我们可以支持写入 `table = "schema.table"`. - -### comment - -schema所属的 CatalogTable 的注释。 - -### Columns - -Columns 是用于定义模式中的列的配置列表,每列可以包含名称(name)、类型(type)、是否可空(nullable)、默认值(defaultValue)、注释(comment)字段。 - -``` -columns = [ - { - name = id - type = bigint - nullable = false - columnLength = 20 - defaultValue = 0 - comment = "primary key id" - } -] -``` - -| 字段 | 是否必须 | 默认值 | 描述 | -|:-------------|:-----|:-----|--------------------| -| name | Yes | - | 列的名称 | -| type | Yes | - | 列的数据类型 | -| nullable | No | true | 列是否可空 | -| columnLength | No | 0 | 列的长度,当您需要定义长度时将很有用 | -| defaultValue | No | null | 列的默认值 | -| comment | No | null | 列的注释 | - -#### 目前支持哪些类型 - -| 数据类型 | Java中的值类型 | 描述 | -|:----------|:---------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| string | `java.lang.String` | 字符串 | -| boolean | `java.lang.Boolean` | 布尔 | -| tinyint | `java.lang.Byte` | 常规-128 至 127 。 0 到 255 无符号*。 指定括号中的最大位数。 | -| smallint | `java.lang.Short` | 常规-32768 至 32767。 0 到 65535 无符号*。 指定括号中的最大位数。 | -| int | `java.lang.Integer` | 允许从 -2,147,483,648 到 2,147,483,647 的所有数字。 | -| bigint | `java.lang.Long` | 允许 -9,223,372,036,854,775,808 和 9,223,372,036,854,775,807 之间的所有数字。 | -| float | `java.lang.Float` | 从-1.79E+308 到 1.79E+308浮点精度数值数据。 | -| double | `java.lang.Double` | 双精度浮点。 处理大多数小数。 | -| decimal | `java.math.BigDecimal` | DOUBLE 类型存储为字符串,允许固定小数点。 | -| null | `java.lang.Void` | null | -| bytes | `byte[]` | 字节。 | -| date | `java.time.LocalDate` | 仅存储日期。从0001年1月1日到9999 年 12 月 31 日。 | -| time | `java.time.LocalTime` | 仅存储时间。精度为 100 纳秒。 | -| timestamp | `java.time.LocalDateTime` | 存储一个唯一的编号,每当创建或修改行时都会更新该编号。 时间戳基于内部时钟,与实际时间不对应。 每个表只能有一个时间戳变量。 | -| row | `org.apache.seatunnel.api.table.type.SeaTunnelRow` | 行类型,可以嵌套。 | -| map | `java.util.Map` | Map 是将键映射到值的对象。 键类型包括: `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double` `decimal` `date` `time` `timestamp` `null` , and the value type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double` `decimal` `date` `time` `timestamp` `null` `array` `map` `row`. | -| array | `ValueType[]` | 数组是一种表示元素集合的数据类型。 元素类型包括: `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double`. | - -#### 如何声明支持的类型 - -SeaTunnel 提供了一种简单直接的方式来声明基本类型。基本类型的关键字包括:`string`, `boolean`, `tinyint`, `smallint`, `int`, `bigint`, `float`, `double`, `date`, `time`, `timestamp`, 和 `null`。基本类型的关键字名称可以直接用作类型声明,并且SeaTunnel对类型关键字不区分大小写。 例如,如果您需要声明一个整数类型的字段,您可以简单地将字段定义为`int`或`"int"`。 - -> null 类型声明必须用双引号引起来, 例如:`"null"`。 这种方法有助于避免与 [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md) 中表示未定义的对象的 `null` 类型混淆。 - -声明复杂类型(例如 **decimal**、**array**、**map** 和 **row**)时,请注意具体注意事项。 -- 声明decimal类型时,需要设置精度(precision)和小数位数(scale),类型定义遵循“decimal(precision, scale)”格式。 需要强调的是,十进制类型的声明必须用 `"` 括起来;不能像基本类型一样直接使用类型名称。例如,当声明精度为 10、小数位数为 2 的十进制字段时,您可以指定字段类型为`"decimal(10,2)"`。 -- 声明array类型时,需要指定元素类型,类型定义遵循 `array` 格式,其中 `T` 代表元素类型。元素类型包括`int`,`string`,`boolean`,`tinyint`,`smallint`,`bigint`,`float` 和 `double`。与十进制类型声明类似,它也用 `"` 括起来。例如,在声明具有整数数组的字段时,将字段类型指定为 `"array"`。 -- 声明map类型时,需要指定键和值类型。map类型定义遵循`map`格式,其中`K`表示键类型,`V`表示值类型。 `K`可以是任何基本类型和十进制类型,`V`可以是 SeaTunnel 支持的任何类型。 与之前的类型声明类似,map类型声明必须用双引号引起来。 例如,当声明一个map类型的字段时,键类型为字符串,值类型为整数,则可以将该字段声明为`"map"`。 -- 声明row类型时,需要定义一个 [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md) 对象来描述字段及其类型。 字段类型可以是 SeaTunnel 支持的任何类型。 例如,当声明包含整数字段“a”和字符串字段“b”的行类型时,可以将其声明为“{a = int, b = string}”。 将定义作为字符串括在 `"` 中也是可以接受的,因此 `"{a = int, b = string}"` 相当于 `{a = int, c = string}`。由于 HOCON 与 JSON 兼容, `"{\"a\":\"int\", \"b\":\"string\"}"` 等价于 `"{a = int, b = string}"`。 - -以下是复杂类型声明的示例: - -```hocon -schema { - fields { - c_decimal = "decimal(10, 2)" - c_array = "array" - c_row = { - c_int = int - c_string = string - c_row = { - c_int = int - } - } - # 在泛型中Hocon风格声明行类型 - map0 = "map" - # 在泛型中Json风格声明行类型 - map1 = "map" - } -} -``` - -### 主键(PrimaryKey) - -主键是用于定义模式中主键的配置,它包含name、columns字段。 - -``` -primaryKey { - name = id - columns = [id] -} -``` - -| 字段 | 是否必须 | 默认值 | 描述 | -|:--------|:-----|:----|---------| -| name | 是 | - | 主键名称 | -| columns | 是 | - | 主键中的列列表 | - -### 约束键(constraintKeys) - -约束键是用于定义模式中约束键的配置列表,它包含constraintName,constraintType,constraintColumns字段。 - -``` -constraintKeys = [ - { - constraintName = "id_index" - constraintType = KEY - constraintColumns = [ - { - columnName = "id" - sortType = ASC - } - ] - }, - ] -``` - -| 字段 | 是否必须 | 默认值 | 描述 | -|:------------------|:-----|:----|------------------------------------------------------------------------| -| constraintName | 是 | - | 约束键的名称 | -| constraintType | 否 | KEY | 约束键的类型 | -| constraintColumns | 是 | - | PrimaryKey中的列列表,每列应包含constraintType和sortType,sortType支持ASC和DESC,默认为ASC | - -#### 目前支持哪些约束类型 - -| 约束类型 | 描述 | -|:-----------|:----| -| INDEX_KEY | 键 | -| UNIQUE_KEY | 唯一键 | - -## 如何使用schema - -### 推荐 - -``` -source { - FakeSource { - parallelism = 2 - result_table_name = "fake" - row.num = 16 - schema { - table = "FakeDatabase.FakeTable" - columns = [ - { - name = id - type = bigint - nullable = false - defaultValue = 0 - comment = "primary key id" - }, - { - name = name - type = "string" - nullable = true - comment = "name" - }, - { - name = age - type = int - nullable = true - comment = "age" - } - ] - primaryKey { - name = "id" - columnNames = [id] - } - constraintKeys = [ - { - constraintName = "unique_name" - constraintType = UNIQUE_KEY - constraintColumns = [ - { - columnName = "name" - sortType = ASC - } - ] - }, - ] - } - } -} -``` - -### 已弃用 - -如果你只需要定义列,你可以使用字段来定义列,这是一种简单的方式,但将来会被删除。 - -``` -source { - FakeSource { - parallelism = 2 - result_table_name = "fake" - row.num = 16 - schema = { - fields { - id = bigint - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_tinyint = tinyint - c_smallint = smallint - c_int = int - c_bigint = bigint - c_float = float - c_double = double - c_decimal = "decimal(2, 1)" - c_bytes = bytes - c_date = date - c_timestamp = timestamp - } - } - } -} -``` - -## 我们什么时候应该使用它,什么时候不应该使用它 - -如果选项中有`schema`配置项目,则连接器可以自定义schema。 比如 `Fake` `Pulsar` `Http` 源连接器等。 diff --git a/docs/zh/concept/speed-limit.md b/docs/zh/concept/speed-limit.md deleted file mode 100644 index cab8fc8bff8..00000000000 --- a/docs/zh/concept/speed-limit.md +++ /dev/null @@ -1,43 +0,0 @@ -# 速度控制 - -## 介绍 - -SeaTunnel提供了强大的速度控制功能允许你管理数据同步的速率。当你需要确保在系统之间数据传输的高效和可控这个功能是至关重要的。 -速度控制主要由两个关键参数控制:`read_limit.rows_per_second` 和 `read_limit.bytes_per_second`。 -本文档将指导您如何使用这些参数以及如何有效地利用它们。 - -## 支持这些引擎 - -> SeaTunnel Zeta
-> Flink
-> Spark
- -## 配置 - -要使用速度控制功能,你需要在job配置中设置`read_limit.rows_per_second` 或 `read_limit.bytes_per_second`参数。 - -配置文件中env配置示例: - -```hocon -env { - job.mode=STREAMING - job.name=SeaTunnel_Job - read_limit.bytes_per_second=7000000 - read_limit.rows_per_second=400 -} -source { - MySQL-CDC { - // ignore... - } -} -transform { -} -sink { - Console { - } -} -``` - -我们在`env`参数中放了`read_limit.bytes_per_second` 和 `read_limit.rows_per_second`来完成速度控制的配置。 -你可以同时配置这两个参数,或者只配置其中一个。每个`value`的值代表每个线程被限制的最大速率。 -因此,在配置各个值时,请考虑你任务的并行性。 diff --git a/docs/zh/contribution/coding-guide.md b/docs/zh/contribution/coding-guide.md deleted file mode 100644 index f102eb68554..00000000000 --- a/docs/zh/contribution/coding-guide.md +++ /dev/null @@ -1,116 +0,0 @@ -# 编码指南 - -本指南整体介绍了当前 Apache SeaTunnel 的模块和提交一个高质量 pull request 的最佳实践。 - -## 模块概述 - -| 模块名 | 介绍 | -|----------------------------------------|---------------------------------------------------------------------------------------------------| -| seatunnel-api | SeaTunnel connector V2 API 模块 | -| seatunnel-apis | SeaTunnel connector V1 API 模块 | -| seatunnel-common | SeaTunnel 通用模块 | -| seatunnel-connectors | SeaTunnel connector V1 模块, 当前 connector V1 处在稳定状态, 社区会持续维护,但不会有大的特性更新 | -| seatunnel-connectors-v2 | SeaTunnel connector V2 模块, connector V2 处于社区重点开发中 | -| seatunnel-core/seatunnel-spark | SeaTunnel connector V1 的 spark 引擎核心启动模块 | -| seatunnel-core/seatunnel-flink | SeaTunnel connector V1 的 flink 引擎核心启动模块 | -| seatunnel-core/seatunnel-flink-sql | SeaTunnel connector V1 的 flink-sql 引擎核心启动模块 | -| seatunnel-core/seatunnel-spark-starter | SeaTunnel connector V2 的 Spark 引擎核心启动模块 | -| seatunnel-core/seatunnel-flink-starter | SeaTunnel connector V2 的 Flink 引擎核心启动模块 | -| seatunnel-core/seatunnel-starter | SeaTunnel connector V2 的 SeaTunnel 引擎核心启动模块 | -| seatunnel-e2e | SeaTunnel 端到端测试模块 | -| seatunnel-examples | SeaTunnel 本地案例模块, 开发者可以用来单元测试和集成测试 | -| seatunnel-engine | SeaTunnel 引擎模块, seatunnel-engine 是 SeaTunnel 社区新开发的计算引擎,用来实现数据同步 | -| seatunnel-formats | SeaTunnel 格式化模块,用来提供格式化数据的能力 | -| seatunnel-plugin-discovery | SeaTunnel 插件发现模块,用来加载类路径中的SPI插件 | -| seatunnel-transforms-v2 | SeaTunnel transform V2 模块, transform V2 处于社区重点开发中 | -| seatunnel-translation | SeaTunnel translation 模块, 用来适配Connector V2 和其他计算引擎, 例如Spark、Flink等 | - -## 如何提交一个高质量的 pull request - -1. 创建实体类的时候使用 `lombok` 插件的注解(`@Data` `@Getter` `@Setter` `@NonNull` 等)来减少代码量。在编码过程中优先使用 lombok 插件是一个很好的习惯。 - -2. 如果你需要在类中使用 log4j 打印日志, 优先使用 `lombok` 中的 `@Slf4j` 注解。 - -3. SeaTunnel 使用 Github issue 来跟踪代码问题,包括 bugs 和 改进, 并且使用 Github pull request 来管理代码的审查和合并。所以创建一个清晰的 issue 或者 pull request 能让社区更好的理解开发者的意图,最佳实践如下: - - > [目的] [模块名称] [子模块名称] 描述 - - 1. Pull request 目的包含: `Hotfix`, `Feature`, `Improve`, `Docs`, `WIP`。 请注意如果选择 `WIP`, 你需要使用 github 的 draft pull request。 - 2. Issue 目的包含: `Feature`, `Bug`, `Docs`, `Discuss`。 - 3. 模块名称: 当前 pull request 或 issue 所涉及的模块名称, 例如: `Core`, `Connector-V2`, `Connector-V1`等。 - 4. 子模块名称: 当前 pull request 或 issue 所涉及的子模块名称, 例如:`File` `Redis` `Hbase`等。 - 5. 描述: 高度概括下当前 pull request 和 issue 要做的事情,尽量见名知意。 - - 提示:**更多内容, 可以参考 [issue guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) 和 [pull request guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)** - -4. 代码片段不要重复。 如果一段代码被使用多次,定义多次不是好的选择,最佳实践是把它公共独立出来让其他模块使用。 - -5. 当抛出一个异常时, 需要一起带上提示信息并且使异常的范围尽可能地小。抛出过于广泛的异常会让错误处理变得复杂并且容易包含安全问题。例如,如果你的 connector 在读数据的时候遇到 `IOException`, 合理的做法如下: - - ```java - try { - // read logic - } catch (IOException e) { - throw SeaTunnelORCFormatException("This orc file is corrupted, please check it", e); - } - ``` - -6. Apache 项目的 license 要求很严格, 每个 Apache 项目文件都应该包含一个 license 声明。 在提交 pull request 之前请检查每个新文件都包含 `Apache License Header`。 - - ```java - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - ``` - -7. Apache SeaTunnel 使用 `Spotless` 管理代码风格和格式检查。你可以使用下面的命令来自动修复代码风格问题和格式。 - - ```shell - ./mvnw spotless:apply - ``` - -8. 提交 pull request 之前,确保修改后项目编译正常,使用下面命令打包整个项目: - - ```shell - # 多线程编译 - ./mvnw -T 1C clean package - ``` - - ```shell - # 单线程编译 - ./mvnw clean package - ``` - -9. 提交 pull request 之前,在本地用完整的单元测试和集成测试来检查你的功能性是否正确,最佳实践是用 `seatunnel-examples` 模块的例子去检查多引擎是否正确运行并且结果正确。 - -10. 如果提交的 pull request 是一个新的特性, 请记得更新文档。 - -12. 提交 connector 相关的 pull request, 可以通过写 e2e 测试保证鲁棒性,e2e 测试需要包含所有的数据类型,并且初始化尽可能小的 docker 镜像,sink 和 source 的测试用例可以写在一起减少资源的损耗。 可以参考这个不错的例子: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java) - -12. 类中默认的权限需要使用 `private`, 不可修改的需要设置 `final`, 特殊场景除外。 - -13. 类中的属性和方法参数倾向于使用基本数据类型(int boolean double float...), 而不是包装类型(Integer Boolean Double Float...), 特殊情况除外。 - -14. 开发一个 sink connector 的时候你需要知道 sink 需要被序列化,如果有不能被序列化的属性, 需要包装到一个类中,并且使用单例模式。 - -15. 如果代码中有多个 `if` 流程判断, 尽量简化为多个 if 而不是 if-else-if。 - -16. Pull request 具有单一职责的特点, 不允许在 pull request 包含与该功能无关的代码, 如果有这种情况, 需要在提交 pull request 之前单独处理好, 否则 Apache SeaTunnel 社区会主动关闭 pull request。 - -17. 贡献者需要对自己的 pull request 负责。 如果 pull request 包含新的特性, 或者修改了老的特性,增加测试用例或者 e2e 用例来证明合理性和保护完整性是一个很好的做法。 - -18. 如果你认为社区当前某部分代码不合理(尤其是核心的 `core` 和 `api` 模块),有函数需要更新修改,优先使用 `discuss issue` 和 `email` 与社区讨论是否有必要修改,社区同意后再提交 pull request, 请不要不经讨论直接提交 pull request, 社区会认为无效并且关闭。 - diff --git a/docs/zh/contribution/contribute-plugin.md b/docs/zh/contribution/contribute-plugin.md deleted file mode 100644 index 514355840d0..00000000000 --- a/docs/zh/contribution/contribute-plugin.md +++ /dev/null @@ -1,5 +0,0 @@ -# 贡献 Connector-v2 插件 - -如果你想要贡献 Connector-V2, 可以参考下面的 Connector-V2 贡献指南。 可以帮你快速进入开发。 - -[Connector-v2 贡献指南](https://github.com/apache/seatunnel/blob/dev/seatunnel-connectors-v2/README.md) diff --git a/docs/zh/contribution/contribute-transform-v2-guide.md b/docs/zh/contribution/contribute-transform-v2-guide.md deleted file mode 100644 index b9abe5da492..00000000000 --- a/docs/zh/contribution/contribute-transform-v2-guide.md +++ /dev/null @@ -1,321 +0,0 @@ -# 贡献 Transform 指南 - -本文描述了如何理解、开发和贡献一个 transform。 - -我们也提供了 [transform e2e test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e) -来验证 transform 的数据输入和输出。 - -## 概念 - -在 SeaTunnel 中你可以通过 connector 读写数据, 但如果你需要在读取数据后或者写入数据前处理数据, 你需要使用 transform。 - -使用 transform 可以简单修改数据行和字段, 例如拆分字段、修改字段的值或者删除字段。 - -### 类型转换 - -Transform 从上游(source 或者 transform)获取类型输入,然后给下游(sink 或者 transform)输出新的类型,这个过程就是类型转换。 - -案例 1:删除字段 - -```shell -| A | B | C | -|-----------|-----------|-----------| -| STRING | INT | BOOLEAN | - -| A | B | -|-----------|-----------| -| STRING | INT | -``` - -案例 2:字段排序 - -```shell -| B | C | A | -|-----------|-----------|-----------| -| INT | BOOLEAN | STRING | - -| A | B | C | -|-----------|-----------|-----------| -| STRING | INT | BOOLEAN | -``` - -案例 3:修改字段类型 - -```shell -| A | B | C | -|-----------|-----------|-----------| -| STRING | INT | BOOLEAN | - - -| A | B | C | -|-----------|-----------|-----------| -| STRING | STRING | STRING | -``` - -案例 4:添加新的字段 - -```shell -| A | B | C | -|-----------|-----------|-----------| -| STRING | INT | BOOLEAN | - - -| A | B | C | D | -|-----------|-----------|-----------|-----------| -| STRING | INT | BOOLEAN | DOUBLE | -``` - -### 数据转换 - -转换类型后,Transform 会从上游(source 或者 transform)获取数据行, 使用[新的数据类型](#类型转换)编辑数据后输出到下游(sink 或者 transform)。这个过程叫数据转换。 - -### 翻译 - -Transform 已经从 execution engine 中解耦, 任何 transform 实现可以不需要修改和配置的适用所有引擎, 这就需要翻译层来做 transform 和 execution engine 的适配。 - -案例:翻译数据类型和数据 - -```shell -原始数据: - -| A | B | C | -|-----------|-----------|-----------| -| STRING | INT | BOOLEAN | - -类型转换: - -| A | B | C | -|-------------------|-------------------|-------------------| -| ENGINE | ENGINE | ENGINE | - -数据转换: - -| A | B | C | -|-------------------|-------------------|-------------------| -| ENGINE<"test"> | ENGINE<1> | ENGINE | -``` - -## 核心 APIs - -### SeaTunnelTransform - -`SeaTunnelTransform` 提供了所有主要的 API, 你可以继承它实现任何转换。 - -1. 从上游获取数据类型。 - -```java -/** - * Set the data type info of input data. - * - * @param inputDataType The data type info of upstream input. - */ - void setTypeInfo(SeaTunnelDataType inputDataType); -``` - -2. 输出新的数据类型给下游。 - -```java -/** - * Get the data type of the records produced by this transform. - * - * @return Produced data type. - */ -SeaTunnelDataType getProducedType(); -``` - -3. 修改输入数据并且输出新的数据到下游。 - -```java -/** - * Transform input data to {@link this#getProducedType()} types data. - * - * @param row the data need be transform. - * @return transformed data. - */ -T map(T row); -``` - -### SingleFieldOutputTransform - -`SingleFieldOutputTransform` 抽象了一个单字段修改操作 - -1. 定义输出字段 - -```java -/** - * Outputs new field - * - * @return - */ -protected abstract String getOutputFieldName(); -``` - -2. 定义输出字段类型 - -```java -/** - * Outputs new field datatype - * - * @return - */ -protected abstract SeaTunnelDataType getOutputFieldDataType(); -``` - -3. 定义输出字段值 - -```java -/** - * Outputs new field value - * - * @param inputRow The inputRow of upstream input. - * @return - */ -protected abstract Object getOutputFieldValue(SeaTunnelRowAccessor inputRow); -``` - -### MultipleFieldOutputTransform - -`MultipleFieldOutputTransform` 抽象了多字段修改操作 - -1. 定义多个输出的字段 - -```java -/** - * Outputs new fields - * - * @return - */ -protected abstract String[] getOutputFieldNames(); -``` - -2. 定义输出字段的类型 - -```java -/** - * Outputs new fields datatype - * - * @return - */ -protected abstract SeaTunnelDataType[] getOutputFieldDataTypes(); -``` - -3. 定义输出字段的值 - -```java -/** - * Outputs new fields value - * - * @param inputRow The inputRow of upstream input. - * @return - */ -protected abstract Object[] getOutputFieldValues(SeaTunnelRowAccessor inputRow); -``` - -### AbstractSeaTunnelTransform - -`AbstractSeaTunnelTransform` 抽象了数据类型和字段的修改操作 - -1. 转换输入的行类型到新的行类型 - -```java -/** - * Outputs transformed row type. - * - * @param inputRowType upstream input row type - * @return - */ -protected abstract SeaTunnelRowType transformRowType(SeaTunnelRowType inputRowType); -``` - -2. 转换输入的行数据到新的行数据 - -```java -/** - * Outputs transformed row data. - * - * @param inputRow upstream input row data - * @return - */ -protected abstract SeaTunnelRow transformRow(SeaTunnelRow inputRow); -``` - -## 开发一个 Transform - -Transform 必须实现下面其中一个 API: -- SeaTunnelTransform -- AbstractSeaTunnelTransform -- SingleFieldOutputTransform -- MultipleFieldOutputTransform - -将实现类放入模块 `seatunnel-transforms-v2`。 - -### 案例: 拷贝字段到一个新的字段 - -```java -@AutoService(SeaTunnelTransform.class) -public class CopyFieldTransform extends SingleFieldOutputTransform { - - private String srcField; - private int srcFieldIndex; - private SeaTunnelDataType srcFieldDataType; - private String destField; - - @Override - public String getPluginName() { - return "Copy"; - } - - @Override - protected void setConfig(Config pluginConfig) { - this.srcField = pluginConfig.getString("src_field"); - this.destField = pluginConfig.getString("dest_fields"); - } - - @Override - protected void setInputRowType(SeaTunnelRowType inputRowType) { - srcFieldIndex = inputRowType.indexOf(srcField); - srcFieldDataType = inputRowType.getFieldType(srcFieldIndex); - } - - @Override - protected String getOutputFieldName() { - return destField; - } - - @Override - protected SeaTunnelDataType getOutputFieldDataType() { - return srcFieldDataType; - } - - @Override - protected Object getOutputFieldValue(SeaTunnelRowAccessor inputRow) { - return inputRow.getField(srcFieldIndex); - } -} -``` - -1. `getPluginName` 方法用来定义 transform 的名字。 -2. @AutoService 注解用来自动生成 `META-INF/services/org.apache.seatunnel.api.transform.SeaTunnelTransform` 文件 -3. `setConfig` 方法用来注入用户配置。 - -## Transform 测试工具 - -当你添加了一个新的插件, 推荐添加一个 e2e 测试用例来测试。 -我们有 `seatunnel-e2e/seatunnel-transforms-v2-e2e` 来帮助你实现。 - -例如, 如果你想要添加一个 `CopyFieldTransform` 的测试用例, 你可以在 `seatunnel-e2e/seatunnel-transforms-v2-e2e` -模块中添加一个新的测试用例, 并且在用例中继承 `TestSuiteBase` 类。 - -```java -public class TestCopyFieldTransformIT extends TestSuiteBase { - - @TestTemplate - public void testCopyFieldTransform(TestContainer container) { - Container.ExecResult execResult = container.executeJob("/copy_transform.conf"); - Assertions.assertEquals(0, execResult.getExitCode()); - } -} -``` - -一旦你的测试用例实现了 `TestSuiteBase` 接口, 并且添加 `@TestTemplate` 注解,它会在所有引擎运行作业,你只需要用你自己的 SeaTunnel 配置文件执行 executeJob 方法, -它会提交 SeaTunnel 作业。 diff --git a/docs/zh/contribution/how-to-create-your-connector.md b/docs/zh/contribution/how-to-create-your-connector.md deleted file mode 100644 index 3aef1b140c2..00000000000 --- a/docs/zh/contribution/how-to-create-your-connector.md +++ /dev/null @@ -1,4 +0,0 @@ -## 开发自己的Connector - -如果你想针对SeaTunnel新的连接器API开发自己的连接器(Connector V2),请查看[这里](https://github.com/apache/seatunnel/blob/dev/seatunnel-connectors-v2/README.zh.md) 。 - diff --git a/docs/zh/contribution/new-license.md b/docs/zh/contribution/new-license.md deleted file mode 100644 index d39019f25b7..00000000000 --- a/docs/zh/contribution/new-license.md +++ /dev/null @@ -1,53 +0,0 @@ -# 如何添加新的 License - -### ASF 第三方许可政策 - -如果您打算向SeaTunnel(或其他Apache项目)添加新功能,并且该功能涉及到其他开源软件引用的时候,请注意目前 Apache 项目支持遵从以下协议的开源软件。 - -[ASF 第三方许可政策](https://apache.org/legal/resolved.html) - -如果您所使用的第三方软件并不在以上协议之中,那么很抱歉,您的代码将无法通过审核,建议您找寻其他替代方案。 - -### 如何在 SeaTunnel 中合法使用第三方开源软件 - -当我们想要引入一个新的第三方软件(包含但不限于第三方的 jar、文本、CSS、js、图片、图标、音视频等及在第三方基础上做的修改)至我们的项目中的时候,除了他们所遵从的协议是 Apache 允许的,另外一点很重要,就是合法的使用。您可以参考以下文章 - -* [COMMUNITY-LED DEVELOPMENT "THE APACHE WAY"](https://apache.org/dev/licensing-howto.html) - -举个例子,当我们使用了 ZooKeeper,那么我们项目就必须包含 ZooKeeper 的 NOTICE 文件(每个开源项目都会有 NOTICE 文件,一般位于根目录),用Apache的话来讲,就是 "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work. - -关于具体的各个开源协议使用协议,在此不做过多篇幅一一介绍,有兴趣可以自行查询了解。 - -### SeaTunnel-License 检测规则 - -通常情况下, 我们会为项目添加 License-check 脚本。 跟其他开源项目略有不同,SeaTunnel 使用 [SkyWalking](https://github.com/apache/skywalking) 提供的 SeaTunnel-License-Check。 总之,我们试图第一时间避免 License 问题。 - -当我们需要添加新的 jar 包或者使用外部资源时, 我们需要按照以下步骤进行操作: - -* 在 known-dependencies.txt 文件中添加 jar 的名称和版本 -* 在 'seatunnel-dist/release-docs/LICENSE' 目录下添加相关 maven 仓库地址 -* 在 'seatunnel-dist/release-docs/NOTICE' 目录下添加相关的 NOTICE 文件, 并确保他们跟原来的仓库中的文件没有区别 -* 在 'seatunnel-dist/release-docs/licenses' 目录下添加相关源码协议文件, 并且文件命令遵守 license-filename.txt 规则。 例:license-zk.txt -* 检查依赖的 license 是否出错 - -``` ---- /dev/fd/63 2020-12-03 03:08:57.191579482 +0000 -+++ /dev/fd/62 2020-12-03 03:08:57.191579482 +0000 -@@ -1,0 +2 @@ -+HikariCP-java6-2.3.13.jar -@@ -16,0 +18 @@ -+c3p0-0.9.5.2.jar -@@ -149,0 +152 @@ -+mchange-commons-java-0.2.11.jar - -- commons-lang-2.1.3.jar -Error: Process completed with exit code 1. -``` - -一般来说,添加一个 jar 的工作通常不是很容易,因为 jar 通常依赖其他各种 jar, 我们还需要为这些 jar 添加相应的许可证。 在这种情况下, 我们会收到检查 license 失败的错误信息。像上面的例子,我们缺少 `HikariCP-java6-2.3.13`, `c3p0` 等的 license 声明(`+` 表示新添加,`-` 表示需要删除), 按照步骤添加 jar。 - -### 参考 - -* [COMMUNITY-LED DEVELOPMENT "THE APACHE WAY"](https://apache.org/dev/licensing-howto.html) -* [ASF 第三方许可政策](https://apache.org/legal/resolved.html) - diff --git a/docs/zh/contribution/setup.md b/docs/zh/contribution/setup.md deleted file mode 100644 index b94c971d75e..00000000000 --- a/docs/zh/contribution/setup.md +++ /dev/null @@ -1,113 +0,0 @@ -# 搭建开发环境 - -在这个章节, 我们会向你展示如何搭建 SeaTunnel 的开发环境, 然后用 JetBrains IntelliJ IDEA 跑一个简单的示例。 - -> 你可以用任何你喜欢的开发环境进行开发和测试,我们只是用 [JetBrains IDEA](https://www.jetbrains.com/idea/) -> 作为示例来展示如何一步步设置环境。 - -## 准备 - -在设置开发环境之前, 需要做一些准备工作, 确保你安装了以下软件: - -* 安装 [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git)。 -* 安装 [Java](https://www.java.com/en/download/) (目前只支持 JDK8/JDK11) 并且设置 `JAVA_HOME` 环境变量。 -* 安装 [Scala](https://www.scala-lang.org/download/2.11.12.html) (目前只支持 scala 2.11.12)。 -* 安装 [JetBrains IDEA](https://www.jetbrains.com/idea/)。 - -## 设置 - -### 克隆源码 - -首先使用以下命令从 [GitHub](https://github.com/apache/seatunnel) 克隆 SeaTunnel 源代码。 - -```shell -git clone git@github.com:apache/seatunnel.git -``` - -### 本地安装子项目 - -在克隆好源代码以后, 运行 `./mvnw` 命令安装子项目到 maven 本地仓库目录。 否则你的代码无法在 IDEA 中正常启动。 - -```shell -./mvnw install -Dmaven.test.skip -``` - -### 源码编译 - -在安装 maven 以后, 可以使用下面命令进行编译和打包。 - -``` -mvn clean package -pl seatunnel-dist -am -Dmaven.test.skip=true -``` - -### 编译子模块 - -如果要单独编译子模块, 可以使用下面的命令进行编译和打包。 - -```ssh -# 这是一个单独构建 redis connector 的示例 - - mvn clean package -pl seatunnel-connectors-v2/connector-redis -am -DskipTests -T 1C -``` - -### 安装 JetBrains IDEA Scala 插件 - -用 JetBrains IntelliJ IDEA 打开你的源码,如果有 Scala 的代码,则需要安装 JetBrains IntelliJ IDEA's [Scala plugin](https://plugins.jetbrains.com/plugin/1347-scala)。 -可以参考 [install plugins for IDEA](https://www.jetbrains.com/help/idea/managing-plugins.html#install-plugins) 。 - -### 安装 JetBrains IDEA Lombok 插件 - -在运行示例之前, 安装 JetBrains IntelliJ IDEA 的 [Lombok plugin](https://plugins.jetbrains.com/plugin/6317-lombok)。 -可以参考 [install plugins for IDEA](https://www.jetbrains.com/help/idea/managing-plugins.html#install-plugins) 。 - -### 代码风格 - -Apache SeaTunnel 使用 `Spotless` 来统一代码风格和格式检查。可以运行下面 `Spotless` 命令自动格式化。 - -```shell -./mvnw spotless:apply -``` - -拷贝 `pre-commit hook` 文件 `/tools/spotless_check/pre-commit.sh` 到你项目的 `.git/hooks/` 目录, 这样每次你使用 `git commit` 提交代码的时候会自动调用 `Spotless` 修复格式问题。 - -## 运行一个简单的示例 - -完成上面所有的工作后,环境搭建已经完成, 可以直接运行我们的示例了。 所有的示例在 `seatunnel-examples` 模块里, 你可以随意选择进行编译和调试,参考 [running or debugging -it in IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html)。 - -我们使用 `seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java` -作为示例, 运行成功后的输出如下: - -```log -+I[Ricky Huo, 71] -+I[Gary, 12] -+I[Ricky Huo, 93] -... -... -+I[Ricky Huo, 83] -``` - -## 更多信息 - -所有的实例都用了简单的 source 和 sink, 这样可以使得运行更独立和更简单。 -你可以修改 `resources/examples` 中的示例的配置。 例如下面的配置使用 PostgreSQL 作为源,并且输出到控制台。 - -```conf -env { - parallelism = 1 -} - -source { - JdbcSource { - driver = org.postgresql.Driver - url = "jdbc:postgresql://host:port/database" - username = postgres - query = "select * from test" - } -} - -sink { - ConsoleSink {} -} -``` - diff --git a/docs/zh/other-engine/flink.md b/docs/zh/other-engine/flink.md deleted file mode 100644 index a9aa7055a2e..00000000000 --- a/docs/zh/other-engine/flink.md +++ /dev/null @@ -1,83 +0,0 @@ -# Seatunnel runs on Flink - -Flink是一个强大的高性能分布式流处理引擎,更多关于它的信息,你可以搜索 `Apache Flink`。 - -### 在Job中设置Flink的配置信息 - -从 `flink` 开始: - -例子: 我对这个项目设置一个精确的检查点 - -``` -env { - parallelism = 1 - flink.execution.checkpointing.unaligned.enabled=true -} -``` - -枚举类型当前还不支持,你需要在Flink的配置文件中指定它们,暂时只有这些类型的设置受支持:
-Integer/Boolean/String/Duration - -### 如何设置一个简单的Flink job - -这是一个运行在Flink中随机生成数据打印到控制台的简单job - -``` -env { - # 公共参数 - parallelism = 1 - checkpoint.interval = 5000 - - # flink特殊参数 - flink.execution.checkpointing.mode = "EXACTLY_ONCE" - flink.execution.checkpointing.timeout = 600000 -} - -source { - FakeSource { - row.num = 16 - result_table_name = "fake_table" - schema = { - fields { - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_int = int - c_bigint = bigint - c_double = double - c_bytes = bytes - c_date = date - c_decimal = "decimal(33, 18)" - c_timestamp = timestamp - c_row = { - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_int = int - c_bigint = bigint - c_double = double - c_bytes = bytes - c_date = date - c_decimal = "decimal(33, 18)" - c_timestamp = timestamp - } - } - } - } -} - -transform { - # 如果你想知道更多关于如何配置seatunnel的信息和查看完整的transform插件, - # 请访问:https://seatunnel.apache.org/docs/transform-v2/sql -} - -sink{ - Console{} -} -``` - -### 如何在项目中运行job - -当你将代码拉到本地后,转到 `seatunnel-examples/seatunnel-flink-connector-v2-example` 模块,查找 `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` 即可完成job的操作 diff --git a/docs/zh/seatunnel-engine/checkpoint-storage.md b/docs/zh/seatunnel-engine/checkpoint-storage.md deleted file mode 100644 index 795e7bf63b5..00000000000 --- a/docs/zh/seatunnel-engine/checkpoint-storage.md +++ /dev/null @@ -1,187 +0,0 @@ ---- - -sidebar_position: 7 -------------------- - -# 检查点存储 - -## 简介 - -检查点是一种容错恢复机制。这种机制确保程序在运行时,即使突然遇到异常,也能自行恢复。 - -### 检查点存储 - -检查点存储是一种存储检查点数据的存储机制。 - -SeaTunnel Engine支持以下检查点存储类型: - -- HDFS (OSS,S3,HDFS,LocalFile) -- LocalFile (本地),(已弃用: 使用Hdfs(LocalFile)替代). - -我们使用微内核设计模式将检查点存储模块从引擎中分离出来。这允许用户实现他们自己的检查点存储模块。 - -`checkpoint-storage-api`是检查点存储模块API,它定义了检查点存储模块的接口。 - -如果你想实现你自己的检查点存储模块,你需要实现`CheckpointStorage`并提供相应的`CheckpointStorageFactory`实现。 - -### 检查点存储配置 - -`seatunnel-server`模块的配置在`seatunnel.yaml`文件中。 - -```yaml - -seatunnel: - engine: - checkpoint: - storage: - type: hdfs #检查点存储的插件名称,支持hdfs(S3, local, hdfs), 默认为localfile (本地文件), 但这种方式已弃用 - # 插件配置 - plugin-config: - namespace: #检查点存储父路径,默认值为/seatunnel/checkpoint/ - K1: V1 # 插件其它配置 - K2: V2 # 插件其它配置 -``` - -注意: namespace必须以"/"结尾。 - -#### OSS - -阿里云oss是基于hdfs-file,所以你可以参考[hadoop oss文档](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)来配置oss. - -除了与oss buckets交互外,oss客户端需要与buckets交互所需的凭据。 -客户端支持多种身份验证机制,并且可以配置使用哪种机制及其使用顺序。也可以使用of org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider的自定义实现。 -如果您使用AliyunCredentialsProvider(可以从阿里云访问密钥管理中获得),它们包括一个access key和一个secret key。 -你可以这样配置: - -```yaml -seatunnel: - engine: - checkpoint: - interval: 6000 - timeout: 7000 - storage: - type: hdfs - max-retained: 3 - plugin-config: - storage.type: oss - oss.bucket: your-bucket - fs.oss.accessKeyId: your-access-key - fs.oss.accessKeySecret: your-secret-key - fs.oss.endpoint: endpoint address - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider -``` - -有关Hadoop Credential Provider API的更多信息,请参见: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). - -阿里云oss凭证提供程序实现见: [验证凭证提供](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth) - -#### S3 - -S3基于hdfs-file,所以你可以参考[hadoop s3文档](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html)来配置s3。 - -除了与公共S3 buckets交互之外,S3A客户端需要与buckets交互所需的凭据。 -客户端支持多种身份验证机制,并且可以配置使用哪种机制及其使用顺序。也可以使用com.amazonaws.auth.AWSCredentialsProvider的自定义实现。 -如果您使用SimpleAWSCredentialsProvider(可以从Amazon Security Token服务中获得),它们包括一个access key和一个secret key。 -您可以这样配置: - -```yaml -``` yaml - -seatunnel: - engine: - checkpoint: - interval: 6000 - timeout: 7000 - storage: - type: hdfs - max-retained: 3 - plugin-config: - storage.type: s3 - s3.bucket: your-bucket - fs.s3a.access.key: your-access-key - fs.s3a.secret.key: your-secret-key - fs.s3a.aws.credentials.provider: org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider - - -``` - -如果您使用`InstanceProfileCredentialsProvider`,它支持在EC2 VM中运行时使用实例配置文件凭据,您可以检查[iam-roles-for-amazon-ec2](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html). -您可以这样配置: - -```yaml - -seatunnel: - engine: - checkpoint: - interval: 6000 - timeout: 7000 - storage: - type: hdfs - max-retained: 3 - plugin-config: - storage.type: s3 - s3.bucket: your-bucket - fs.s3a.endpoint: your-endpoint - fs.s3a.aws.credentials.provider: org.apache.hadoop.fs.s3a.InstanceProfileCredentialsProvider -``` - -有关Hadoop Credential Provider API的更多信息,请参见: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). - -#### HDFS - -如果您使用HDFS,您可以这样配置: - -```yaml -seatunnel: - engine: - checkpoint: - storage: - type: hdfs - max-retained: 3 - plugin-config: - storage.type: hdfs - fs.defaultFS: hdfs://localhost:9000 - // 如果您使用kerberos,您可以这样配置: - kerberosPrincipal: your-kerberos-principal - kerberosKeytabFilePath: your-kerberos-keytab -``` - -如果HDFS是HA模式,您可以这样配置: - -```yaml -seatunnel: - engine: - checkpoint: - storage: - type: hdfs - max-retained: 3 - plugin-config: - storage.type: hdfs - fs.defaultFS: hdfs://usdp-bing - seatunnel.hadoop.dfs.nameservices: usdp-bing - seatunnel.hadoop.dfs.ha.namenodes.usdp-bing: nn1,nn2 - seatunnel.hadoop.dfs.namenode.rpc-address.usdp-bing.nn1: usdp-bing-nn1:8020 - seatunnel.hadoop.dfs.namenode.rpc-address.usdp-bing.nn2: usdp-bing-nn2:8020 - seatunnel.hadoop.dfs.client.failover.proxy.provider.usdp-bing: org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider - -``` - -如果HDFS在`hdfs-site.xml`或`core-site.xml`中有其他配置,只需使用`seatunnel.hadoop.`前缀设置HDFS配置即可。 - -#### 本地文件 - -```yaml -seatunnel: - engine: - checkpoint: - interval: 6000 - timeout: 7000 - storage: - type: hdfs - max-retained: 3 - plugin-config: - storage.type: hdfs - fs.defaultFS: file:/// # 请确保该目录具有写权限 - -``` - diff --git a/docs/zh/seatunnel-engine/cluster-mode.md b/docs/zh/seatunnel-engine/cluster-mode.md deleted file mode 100644 index a0b11cd1dfa..00000000000 --- a/docs/zh/seatunnel-engine/cluster-mode.md +++ /dev/null @@ -1,21 +0,0 @@ ---- - -sidebar_position: 3 -------------------- - -# 以集群模式运行作业 - -这是最推荐的在生产环境中使用SeaTunnel Engine的方法。此模式支持SeaTunnel Engine的全部功能,集群模式将具有更好的性能和稳定性。 - -在集群模式下,首先需要部署SeaTunnel Engine集群,然后客户端将作业提交给SeaTunnel Engine集群运行。 - -## 部署SeaTunnel Engine集群 - -部署SeaTunnel Engine集群参考[SeaTunnel Engine集群部署](../../en/seatunnel-engine/deployment.md) - -## 提交作业 - -```shell -$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -``` - diff --git a/docs/zh/seatunnel-engine/local-mode.md b/docs/zh/seatunnel-engine/local-mode.md deleted file mode 100644 index 3738721fa79..00000000000 --- a/docs/zh/seatunnel-engine/local-mode.md +++ /dev/null @@ -1,25 +0,0 @@ ---- - -sidebar_position: 2 -------------------- - -# 以本地模式运行作业 - -仅用于测试。 - -最推荐在生产环境中使用SeaTunnel Engine的方式为[集群模式](cluster-mode.md). - -## 本地模式部署SeaTunnel Engine - -[部署SeaTunnel Engine本地模式参考](../../en/start-v2/locally/deployment.md) - -## 修改SeaTunnel Engine配置 - -将$SEATUNNEL_HOME/config/hazelcast.yaml中的自动增量更新为true - -## 提交作业 - -```shell -$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -e local -``` - diff --git a/docs/zh/seatunnel-engine/rest-api.md b/docs/zh/seatunnel-engine/rest-api.md deleted file mode 100644 index a3f8d10d190..00000000000 --- a/docs/zh/seatunnel-engine/rest-api.md +++ /dev/null @@ -1,384 +0,0 @@ ---- - -sidebar_position: 7 -------------------- - -# REST API - -SeaTunnel有一个用于监控的API,可用于查询运行作业的状态和统计信息,以及最近完成的作业。监控API是REST-ful风格的,它接受HTTP请求并使用JSON数据格式进行响应。 - -## 概述 - -监控API是由运行的web服务提供的,它是节点运行的一部分,每个节点成员都可以提供rest API功能。 -默认情况下,该服务监听端口为5801,该端口可以在hazelcast.yaml中配置,如下所示: - -```yaml -network: - rest-api: - enabled: true - endpoint-groups: - CLUSTER_WRITE: - enabled: true - DATA: - enabled: true - join: - tcp-ip: - enabled: true - member-list: - - localhost - port: - auto-increment: true - port-count: 100 - port: 5801 -``` - -## API参考 - -### 返回所有作业及其当前状态的概览。 - -
- GET /hazelcast/rest/maps/running-jobs (返回所有作业及其当前状态的概览。) - -#### 参数 - -#### 响应 - -```json -[ - { - "jobId": "", - "jobName": "", - "jobStatus": "", - "envOptions": { - }, - "createTime": "", - "jobDag": { - "vertices": [ - ], - "edges": [ - ] - }, - "pluginJarsUrls": [ - ], - "isStartWithSavePoint": false, - "metrics": { - "sourceReceivedCount": "", - "sinkWriteCount": "" - } - } -] -``` - -
- ------------------------------------------------------------------------------------------- - -### 返回作业的详细信息。 - -
- GET /hazelcast/rest/maps/running-job/:jobId (返回作业的详细信息。) - -#### 参数 - -> | name | type | data type | description | -> |-------|----------|-----------|-------------| -> | jobId | required | long | job id | - -#### 响应 - -```json -{ - "jobId": "", - "jobName": "", - "jobStatus": "", - "envOptions": { - }, - "createTime": "", - "jobDag": { - "vertices": [ - ], - "edges": [ - ] - }, - "pluginJarsUrls": [ - ], - "isStartWithSavePoint": false, - "metrics": { - "sourceReceivedCount": "", - "sinkWriteCount": "" - } -} -``` - -
- ------------------------------------------------------------------------------------------- - -### 返回所有已完成的作业信息。 - -
- GET /hazelcast/rest/maps/finished-jobs/:state (返回所有已完成的作业信息。) - -#### 参数 - -> | name | type | data type | description | -> |-------|----------|-----------|------------------------------------------------------------------| -> | state | optional | string | finished job status. `FINISHED`,`CANCELED`,`FAILED`,`UNKNOWABLE` | - -#### 响应 - -```json -[ - { - "jobId": "", - "jobName": "", - "jobStatus": "", - "errorMsg": null, - "createTime": "", - "finishTime": "", - "jobDag": "", - "metrics": "" - } -] -``` - -
- ------------------------------------------------------------------------------------------- - -### 返回系统监控信息。 - -
- GET /hazelcast/rest/maps/system-monitoring-information (返回系统监控信息。) - -#### 参数 - -#### 响应 - -```json -[ - { - "processors":"8", - "physical.memory.total":"16.0G", - "physical.memory.free":"16.3M", - "swap.space.total":"0", - "swap.space.free":"0", - "heap.memory.used":"135.7M", - "heap.memory.free":"440.8M", - "heap.memory.total":"576.5M", - "heap.memory.max":"3.6G", - "heap.memory.used/total":"23.54%", - "heap.memory.used/max":"3.73%", - "minor.gc.count":"6", - "minor.gc.time":"110ms", - "major.gc.count":"2", - "major.gc.time":"73ms", - "load.process":"24.78%", - "load.system":"60.00%", - "load.systemAverage":"2.07", - "thread.count":"117", - "thread.peakCount":"118", - "cluster.timeDiff":"0", - "event.q.size":"0", - "executor.q.async.size":"0", - "executor.q.client.size":"0", - "executor.q.client.query.size":"0", - "executor.q.client.blocking.size":"0", - "executor.q.query.size":"0", - "executor.q.scheduled.size":"0", - "executor.q.io.size":"0", - "executor.q.system.size":"0", - "executor.q.operations.size":"0", - "executor.q.priorityOperation.size":"0", - "operations.completed.count":"10", - "executor.q.mapLoad.size":"0", - "executor.q.mapLoadAllKeys.size":"0", - "executor.q.cluster.size":"0", - "executor.q.response.size":"0", - "operations.running.count":"0", - "operations.pending.invocations.percentage":"0.00%", - "operations.pending.invocations.count":"0", - "proxy.count":"8", - "clientEndpoint.count":"0", - "connection.active.count":"2", - "client.connection.count":"0", - "connection.count":"0" - } -] -``` - -
- ------------------------------------------------------------------------------------------- - -### 提交作业。 - -
-POST /hazelcast/rest/maps/submit-job (如果作业提交成功,返回jobId和jobName。) - -#### 参数 - -> | name | type | data type | description | -> |----------------------|----------|-----------|-----------------------------------| -> | jobId | optional | string | job id | -> | jobName | optional | string | job name | -> | isStartWithSavePoint | optional | string | if job is started with save point | - -#### 请求体 - -```json -{ - "env": { - "job.mode": "batch" - }, - "source": [ - { - "plugin_name": "FakeSource", - "result_table_name": "fake", - "row.num": 100, - "schema": { - "fields": { - "name": "string", - "age": "int", - "card": "int" - } - } - } - ], - "transform": [ - ], - "sink": [ - { - "plugin_name": "Console", - "source_table_name": ["fake"] - } - ] -} -``` - -#### 响应 - -```json -{ - "jobId": 733584788375666689, - "jobName": "rest_api_test" -} -``` - -
- ------------------------------------------------------------------------------------------- - -### 停止作业。 - -
-POST /hazelcast/rest/maps/stop-job (如果作业成功停止,返回jobId。) - -#### 请求体 - -```json -{ - "jobId": 733584788375666689, - "isStopWithSavePoint": false # if job is stopped with save point -} -``` - -#### 响应 - -```json -{ -"jobId": 733584788375666689 -} -``` - -
- ------------------------------------------------------------------------------------------- - -### 加密配置。 - -
-POST /hazelcast/rest/maps/encrypt-config (如果配置加密成功,则返回加密后的配置。) -有关自定义加密的更多信息,请参阅文档[配置-加密-解密](../connector-v2/Config-Encryption-Decryption.md). - -#### 请求体 - -```json -{ - "env": { - "parallelism": 1, - "shade.identifier":"base64" - }, - "source": [ - { - "plugin_name": "MySQL-CDC", - "schema" : { - "fields": { - "name": "string", - "age": "int" - } - }, - "result_table_name": "fake", - "parallelism": 1, - "hostname": "127.0.0.1", - "username": "seatunnel", - "password": "seatunnel_password", - "table-name": "inventory_vwyw0n" - } - ], - "transform": [ - ], - "sink": [ - { - "plugin_name": "Clickhouse", - "host": "localhost:8123", - "database": "default", - "table": "fake_all", - "username": "seatunnel", - "password": "seatunnel_password" - } - ] -} -``` - -#### 响应 - -```json -{ - "env": { - "parallelism": 1, - "shade.identifier": "base64" - }, - "source": [ - { - "plugin_name": "MySQL-CDC", - "schema": { - "fields": { - "name": "string", - "age": "int" - } - }, - "result_table_name": "fake", - "parallelism": 1, - "hostname": "127.0.0.1", - "username": "c2VhdHVubmVs", - "password": "c2VhdHVubmVsX3Bhc3N3b3Jk", - "table-name": "inventory_vwyw0n" - } - ], - "transform": [], - "sink": [ - { - "plugin_name": "Clickhouse", - "host": "localhost:8123", - "database": "default", - "table": "fake_all", - "username": "c2VhdHVubmVs", - "password": "c2VhdHVubmVsX3Bhc3N3b3Jk" - } - ] -} -``` - -
- diff --git a/docs/zh/transform-v2/common-options.md b/docs/zh/transform-v2/common-options.md deleted file mode 100644 index 9a756760f2c..00000000000 --- a/docs/zh/transform-v2/common-options.md +++ /dev/null @@ -1,23 +0,0 @@ -# 转换常见选项 - -> 源端连接器的常见参数 - -| 参数名称 | 参数类型 | 是否必须 | 默认值 | -|-------------------|--------|------|-----| -| result_table_name | string | no | - | -| source_table_name | string | no | - | - -### source_table_name [string] - -当未指定 `source_table_name` 时,当前插件在配置文件中处理由前一个插件输出的数据集 `(dataset)` ; - -当指定了 `source_table_name` 时,当前插件正在处理与该参数对应的数据集 - -### result_table_name [string] - -当未指定 `result_table_name` 时,此插件处理的数据不会被注册为其他插件可以直接访问的数据集,也不会被称为临时表 `(table)`; - -当指定了 `result_table_name` 时,此插件处理的数据将被注册为其他插件可以直接访问的数据集 `(dataset)`,或者被称为临时表 `(table)`。在这里注册的数据集可以通过指定 `source_table_name` 被其他插件直接访问。 - -## 示例 - diff --git a/docs/zh/transform-v2/copy.md b/docs/zh/transform-v2/copy.md deleted file mode 100644 index a4ca5c613a7..00000000000 --- a/docs/zh/transform-v2/copy.md +++ /dev/null @@ -1,65 +0,0 @@ -# 复制 - -> 复制转换插件 - -## 描述 - -将字段复制到一个新字段。 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|--------|--------|------|-----| -| fields | Object | yes | | - -### fields [config] - -指定输入和输出之间的字段复制关系 - -### 常见选项 [string] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情。 - -## 示例 - -从源读取的数据是这样的一个表: - -| name | age | card | -|----------|-----|------| -| Joy Ding | 20 | 123 | -| May Ding | 20 | 123 | -| Kin Dom | 20 | 123 | -| Joy Dom | 20 | 123 | - -想要将字段 `name`、`age` 复制到新的字段 `name1`、`name2`、`age1`,我们可以像这样添加 `Copy` 转换: - -``` -transform { - Copy { - source_table_name = "fake" - result_table_name = "fake1" - fields { - name1 = name - name2 = name - age1 = age - } - } -} -``` - -那么结果表 `fake1` 中的数据将会像这样: - -| name | age | card | name1 | name2 | age1 | -|----------|-----|------|----------|----------|------| -| Joy Ding | 20 | 123 | Joy Ding | Joy Ding | 20 | -| May Ding | 20 | 123 | May Ding | May Ding | 20 | -| Kin Dom | 20 | 123 | Kin Dom | Kin Dom | 20 | -| Joy Dom | 20 | 123 | Joy Dom | Joy Dom | 20 | - -## 更新日志 - -### 新版本 - -- 添加复制转换连接器 -- 支持将字段复制到新字段 - diff --git a/docs/zh/transform-v2/field-mapper.md b/docs/zh/transform-v2/field-mapper.md deleted file mode 100644 index 298d3fa72c9..00000000000 --- a/docs/zh/transform-v2/field-mapper.md +++ /dev/null @@ -1,64 +0,0 @@ -# 字段映射 - -> 字段映射转换插件 - -## 描述 - -添加输入模式和输出模式映射 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|--------------|--------|------|-----| -| field_mapper | Object | yes | | - -### field_mapper [config] - -指定输入和输出之间的字段映射关系 - -### common options [config] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 - -## 示例 - -源端数据读取的表格如下: - -| id | name | age | card | -|----|----------|-----|------| -| 1 | Joy Ding | 20 | 123 | -| 2 | May Ding | 20 | 123 | -| 3 | Kin Dom | 20 | 123 | -| 4 | Joy Dom | 20 | 123 | - -我们想要删除 `age` 字段,并更新字段顺序为 `id`、`card`、`name`,同时将 `name` 重命名为 `new_name`。我们可以像这样添加 `FieldMapper` 转换: - -``` -transform { - FieldMapper { - source_table_name = "fake" - result_table_name = "fake1" - field_mapper = { - id = id - card = card - name = new_name - } - } -} -``` - -那么结果表 `fake1` 中的数据将会像这样: - -| id | card | new_name | -|----|------|----------| -| 1 | 123 | Joy Ding | -| 2 | 123 | May Ding | -| 3 | 123 | Kin Dom | -| 4 | 123 | Joy Dom | - -## 更新日志 - -### 新版本 - -- 添加复制转换连接器 - diff --git a/docs/zh/transform-v2/filter-rowkind.md b/docs/zh/transform-v2/filter-rowkind.md deleted file mode 100644 index 74d2b2d5b1e..00000000000 --- a/docs/zh/transform-v2/filter-rowkind.md +++ /dev/null @@ -1,68 +0,0 @@ -# 行类型过滤 - -> 行类型转换插件 - -## 描述 - -按行类型过滤数据 - -## 操作 - -| 名称 | 类型 | 是否必须 | 默认值 | -|---------------|-------|------|-----| -| include_kinds | array | yes | | -| exclude_kinds | array | yes | | - -### include_kinds [array] - -要包含的行类型 - -### exclude_kinds [array] - -要排除的行类型。 - -您只能配置 `include_kinds` 和 `exclude_kinds` 中的一个。 - -### common options [string] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 - -## 示例 - -FakeSource 生成的数据的行类型是 `INSERT`。如果我们使用 `FilterRowKink` 转换并排除 `INSERT` 数据,我们将不会向接收器写入任何行。 - -```yaml - -env { - job.mode = "BATCH" -} - -source { - FakeSource { - result_table_name = "fake" - row.num = 100 - schema = { - fields { - id = "int" - name = "string" - age = "int" - } - } - } -} - -transform { - FilterRowKind { - source_table_name = "fake" - result_table_name = "fake1" - exclude_kinds = ["INSERT"] - } -} - -sink { - Console { - source_table_name = "fake1" - } -} -``` - diff --git a/docs/zh/transform-v2/filter.md b/docs/zh/transform-v2/filter.md deleted file mode 100644 index 706a72ead12..00000000000 --- a/docs/zh/transform-v2/filter.md +++ /dev/null @@ -1,60 +0,0 @@ -# 过滤器 - -> 过滤器转换插件 - -## 描述 - -过滤字段 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|--------|-------|------|-----| -| fields | array | yes | | - -### fields [array] - -需要保留的字段列表。不在列表中的字段将被删除。 - -### common options [string] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 - -## 示例 - -源端数据读取的表格如下: - -| name | age | card | -|----------|-----|------| -| Joy Ding | 20 | 123 | -| May Ding | 20 | 123 | -| Kin Dom | 20 | 123 | -| Joy Dom | 20 | 123 | - -我们想要删除字段 `age`,我们可以像这样添加 `Filter` 转换 - -``` -transform { - Filter { - source_table_name = "fake" - result_table_name = "fake1" - fields = [name, card] - } -} -``` - -那么结果表 `fake1` 中的数据将会像这样: - -| name | card | -|----------|------| -| Joy Ding | 123 | -| May Ding | 123 | -| Kin Dom | 123 | -| Joy Dom | 123 | - -## 更新日志 - -### 新版本 - -- 添加过滤转器换连接器 - diff --git a/docs/zh/transform-v2/jsonpath.md b/docs/zh/transform-v2/jsonpath.md deleted file mode 100644 index 449f0f6a77f..00000000000 --- a/docs/zh/transform-v2/jsonpath.md +++ /dev/null @@ -1,190 +0,0 @@ -# JsonPath - -> JSONPath 转换插件 - -## 描述 - -> 支持使用 JSONPath 选择数据 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|---------|-------|------|-----| -| Columns | Array | Yes | | - -### common options [string] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 - -### fields[array] - -#### 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|------------|--------|------|--------| -| src_field | String | Yes | | -| dest_field | String | Yes | | -| path | String | Yes | | -| dest_type | String | No | String | - -#### src_field - -> 要解析的 JSON 源字段 - -支持的Seatunnel数据类型 - -* STRING -* BYTES -* ARRAY -* MAP -* ROW - -#### dest_field - -> 使用 JSONPath 后的输出字段 - -#### dest_type - -> 目标字段的类型 - -#### path - -> Jsonpath - -## 读取 JSON 示例 - -从源读取的数据是像这样的 JSON - -```json -{ - "data": { - "c_string": "this is a string", - "c_boolean": true, - "c_integer": 42, - "c_float": 3.14, - "c_double": 3.14, - "c_decimal": 10.55, - "c_date": "2023-10-29", - "c_datetime": "16:12:43.459", - "c_array":["item1", "item2", "item3"] - } -} -``` - -假设我们想要使用 JsonPath 提取属性。 - -```json -transform { - JsonPath { - source_table_name = "fake" - result_table_name = "fake1" - columns = [ - { - "src_field" = "data" - "path" = "$.data.c_string" - "dest_field" = "c1_string" - }, - { - "src_field" = "data" - "path" = "$.data.c_boolean" - "dest_field" = "c1_boolean" - "dest_type" = "boolean" - }, - { - "src_field" = "data" - "path" = "$.data.c_integer" - "dest_field" = "c1_integer" - "dest_type" = "int" - }, - { - "src_field" = "data" - "path" = "$.data.c_float" - "dest_field" = "c1_float" - "dest_type" = "float" - }, - { - "src_field" = "data" - "path" = "$.data.c_double" - "dest_field" = "c1_double" - "dest_type" = "double" - }, - { - "src_field" = "data" - "path" = "$.data.c_decimal" - "dest_field" = "c1_decimal" - "dest_type" = "decimal(4,2)" - }, - { - "src_field" = "data" - "path" = "$.data.c_date" - "dest_field" = "c1_date" - "dest_type" = "date" - }, - { - "src_field" = "data" - "path" = "$.data.c_datetime" - "dest_field" = "c1_datetime" - "dest_type" = "time" - }, - { - "src_field" = "data" - "path" = "$.data.c_array" - "dest_field" = "c1_array" - "dest_type" = "array" - } - ] - } -} -``` - -那么数据结果表 `fake1` 将会像这样 - -| data | c1_string | c1_boolean | c1_integer | c1_float | c1_double | c1_decimal | c1_date | c1_datetime | c1_array | -|------------------------------|------------------|------------|------------|----------|-----------|------------|------------|--------------|-----------------------------| -| too much content not to show | this is a string | true | 42 | 3.14 | 3.14 | 10.55 | 2023-10-29 | 16:12:43.459 | ["item1", "item2", "item3"] | - -## 读取 SeatunnelRow 示例 - -假设数据行中的一列的类型是 SeatunnelRow,列的名称为 col - - - - - -
SeatunnelRow(col)other
nameage....
a18....
- -JsonPath 转换将 seatunnel 的值转换为一个数组。 - -```json -transform { - JsonPath { - source_table_name = "fake" - result_table_name = "fake1" - columns = [ - { - "src_field" = "col" - "path" = "$[0]" - "dest_field" = "name" - "dest_type" = "string" - }, - { - "src_field" = "col" - "path" = "$[1]" - "dest_field" = "age" - "dest_type" = "int" - } - ] - } -} -``` - -那么数据结果表 `fake1` 将会像这样: - -| name | age | col | other | -|------|-----|----------|-------| -| a | 18 | ["a",18] | ... | - -## 更新日志 - -* 添加 JsonPath 转换 - diff --git a/docs/zh/transform-v2/replace.md b/docs/zh/transform-v2/replace.md deleted file mode 100644 index 99eef89a1ab..00000000000 --- a/docs/zh/transform-v2/replace.md +++ /dev/null @@ -1,121 +0,0 @@ -# 替换 - -> 替换转换插件 - -## 描述 - -检查给定字段中的字符串值,并用给定的替换项替换与给定字符串字面量或正则表达式匹配的字符串值的子字符串。 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|---------------|---------|------|-------| -| replace_field | string | yes | | -| pattern | string | yes | - | -| replacement | string | yes | - | -| is_regex | boolean | no | false | -| replace_first | boolean | no | false | - -### replace_field [string] - -需要替换的字段 - -### pattern [string] - -将被替换的旧字符串 - -### replacement [string] - -用于替换的新字符串 - -### is_regex [boolean] - -使用正则表达式进行字符串匹配 - -### replace_first [boolean] - -是否替换第一个匹配字符串。仅在 `is_regex = true` 时使用。 - -### common options [string] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 - -## 示例 - -源端数据读取的表格如下: - -| name | age | card | -|----------|-----|------| -| Joy Ding | 20 | 123 | -| May Ding | 20 | 123 | -| Kin Dom | 20 | 123 | -| Joy Dom | 20 | 123 | - -我们想要将 `name` 字段中的字符 ``替换为 `_`。然后我们可以添加一个 `Replace` 转换,像这样: - -``` -transform { - Replace { - source_table_name = "fake" - result_table_name = "fake1" - replace_field = "name" - pattern = " " - replacement = "_" - is_regex = true - } -} -``` - -那么结果表 `fake1` 中的数据将会更新为: - -| name | age | card | -|----------|-----|------| -| Joy_Ding | 20 | 123 | -| May_Ding | 20 | 123 | -| Kin_Dom | 20 | 123 | -| Joy_Dom | 20 | 123 | - -## 作业配置示例 - -``` -env { - job.mode = "BATCH" -} - -source { - FakeSource { - result_table_name = "fake" - row.num = 100 - schema = { - fields { - id = "int" - name = "string" - } - } - } -} - -transform { - Replace { - source_table_name = "fake" - result_table_name = "fake1" - replace_field = "name" - pattern = ".+" - replacement = "b" - is_regex = true - } -} - -sink { - Console { - source_table_name = "fake1" - } -} -``` - -## 更新日志 - -### 新版本 - -- 添加替换转换连接器 - diff --git a/docs/zh/transform-v2/split.md b/docs/zh/transform-v2/split.md deleted file mode 100644 index ef8c3f58540..00000000000 --- a/docs/zh/transform-v2/split.md +++ /dev/null @@ -1,72 +0,0 @@ -# 拆分 - -> 拆分转换插件 - -## 描述 - -拆分一个字段为多个字段。 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|---------------|--------|------|-----| -| separator | string | yes | | -| split_field | string | yes | | -| output_fields | array | yes | | - -### separator [string] - -拆分内容的分隔符 - -### split_field[string] - -需要拆分的字段 - -### output_fields[array] - -拆分后的结果字段 - -### common options [string] - -转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 - -## 示例 - -源端数据读取的表格如下: - -| name | age | card | -|----------|-----|------| -| Joy Ding | 20 | 123 | -| May Ding | 20 | 123 | -| Kin Dom | 20 | 123 | -| Joy Dom | 20 | 123 | - -我们想要将 `name` 字段拆分为 `first_name` 和 `second_name`,我们可以像这样添加 `Split` 转换: - -``` -transform { - Split { - source_table_name = "fake" - result_table_name = "fake1" - separator = " " - split_field = "name" - output_fields = [first_name, second_name] - } -} -``` - -那么结果表 `fake1` 中的数据将会像这样: - -| name | age | card | first_name | last_name | -|----------|-----|------|------------|-----------| -| Joy Ding | 20 | 123 | Joy | Ding | -| May Ding | 20 | 123 | May | Ding | -| Kin Dom | 20 | 123 | Kin | Dom | -| Joy Dom | 20 | 123 | Joy | Dom | - -## 更新日志 - -### 新版本 - -- 添加拆分转换连接器 - diff --git a/docs/zh/transform-v2/sql-functions.md b/docs/zh/transform-v2/sql-functions.md deleted file mode 100644 index cd90b948674..00000000000 --- a/docs/zh/transform-v2/sql-functions.md +++ /dev/null @@ -1,966 +0,0 @@ -# SQL函数 - -> SQL函数转换插件功能 - -## 字符串函数 - -### ASCII - -```ASCII(string)``` - -返回字符串中第一个字符的ASCII值。此方法返回一个整数。 - -示例: - -ASCII('Hi') - -### BIT_LENGTH - -```BIT_LENGTH(bytes)``` - -返回二进制字符串中的位数。该方法返回一个长整型 - -示例: - -BIT_LENGTH(NAME) - -### CHAR_LENGTH / LENGTH - -```CHAR_LENGTH | LENGTH (string)``` - -这个方法返回一个字符串中字符的数量,返回类型为 long。 - -示例: - -CHAR_LENGTH(NAME) - -### OCTET_LENGTH - -```OCTET_LENGTH(bytes)``` - -返回二进制字符串中字节的数量。此方法返回一个 long 类型的值。 - -示例: - -OCTET_LENGTH(NAME) - -### CHAR / CHR - -```CHAR | CHR (int)``` - -返回表示ASCII值的字符。该方法返回一个字符串。 - -示例: - -CHAR(65) - -### CONCAT - -```CONCAT(string, string[, string ...] )``` - -组合字符串。与运算符 `||` 不同,**NULL** 参数会被忽略,不会导致结果变为 **NULL**。如果所有参数都是 NULL,则结果是一个空字符串。该方法返回一个字符串。 - -示例: - -CONCAT(NAME, '_') - -### CONCAT_WS - -```CONCAT_WS(separatorString, string, string[, string ...] )``` - -使用分隔符组合字符串。如果分隔符为 **NULL**,则会被视为空字符串。其他 **NULL** 参数会被忽略。剩余的 **非NULL** 参数(如果有)将用指定的分隔符连接起来。如果没有剩余参数,则结果是一个空字符串。该方法返回一个字符串。 - -示例: - -CONCAT_WS(',', NAME, '_') - -### HEXTORAW - -```HEXTORAW(string)``` - -将字符串的十六进制表示转换为字符串。每个字符串字符使用4个十六进制字符。 - -示例: - -HEXTORAW(DATA) - -### RAWTOHEX - -```RAWTOHEX(string)``` - -```RAWTOHEX(bytes)``` - -将字符串或字节转换为十六进制表示。每个字符串字符使用4个十六进制字符。该方法返回一个字符串。 - -示例: - -RAWTOHEX(DATA) - -### INSERT - -```INSERT(originalString, startInt, lengthInt, addString)``` - -在原始字符串的指定起始位置插入额外的字符串。长度参数指定在原始字符串的起始位置删除的字符数。该方法返回一个字符串。 - -示例: - -INSERT(NAME, 1, 1, ' ') - -### LOWER / LCASE - -```LOWER | LCASE (string)``` - -将字符串转换为小写形式。 - -示例: - -LOWER(NAME) - -### UPPER / UCASE - -```UPPER | UCASE (string)``` - -将字符串转换为大写形式。 - -示例: - -UPPER(NAME) - -### LEFT - -```LEFT(string, int)``` - -返回最左边的一定数量的字符。 - -示例: - -LEFT(NAME, 3) - -### RIGHT - -```RIGHT(string, int)``` - -返回最右边的一定数量的字符。 - -示例: - -RIGHT(NAME, 3) - -### LOCATE / INSTR / POSITION - -```LOCATE(searchString, string[, startInit])``` - -```INSTR(string, searchString[, startInit])``` - -```POSITION(searchString, string)``` - -返回字符串中搜索字符串的位置。如果使用了起始位置参数,则忽略它之前的字符。如果位置参数是负数,则返回最右边的位置。如果未找到搜索字符串,则返回 0。请注意,即使参数不区分大小写,此函数也区分大小写。 - -示例: - -LOCATE('.', NAME) - -### LPAD - -```LPAD(string ,int[, string])``` - -将字符串左侧填充到指定的长度。如果长度比字符串短,则字符串将在末尾被截断。如果未设置填充字符串,则使用空格填充。 - -示例: - -LPAD(AMOUNT, 10, '*') - -### RPAD - -```RPAD(string, int[, string])``` - -将字符串右侧填充到指定的长度。如果长度比字符串短,则字符串将被截断。如果未设置填充字符串,则使用空格填充。 - -示例: - -RPAD(TEXT, 10, '-') - -### LTRIM - -```LTRIM(string[, characterToTrimString])``` - -移除字符串中所有前导空格或其他指定的字符。 - -此函数已被弃用,请使用 TRIM 替代。 - -示例: - -LTRIM(NAME) - -### RTRIM - -```RTRIM(string[, characterToTrimString])``` - -移除字符串中所有尾随空格或其他指定的字符。 - -此函数已被弃用,请使用 TRIM 替代。 - -示例: - -RTRIM(NAME) - -### TRIM - -```TRIM(string[, characterToTrimString])``` - -移除字符串中所有前导空格或其他指定的字符。 - -此函数已被弃用,请使用 TRIM 替代。 - -示例: - -LTRIM(NAME) - -### REGEXP_REPLACE - -```REGEXP_REPLACE(inputString, regexString, replacementString[, flagsString])``` - -替换与正则表达式匹配的每个子字符串。详情请参阅 Java String.replaceAll() 方法。如果任何参数为 null(除了可选的 flagsString 参数),则结果为 null。 - -标志值限于 'i'、'c'、'n'、'm'。其他符号会引发异常。可以在一个 flagsString 参数中使用多个符号(例如 'im')。后面的标志会覆盖前面的标志,例如 'ic' 等同于区分大小写匹配 'c'。 - -'i' 启用不区分大小写匹配(Pattern.CASE_INSENSITIVE) - -'c' 禁用不区分大小写匹配(Pattern.CASE_INSENSITIVE) - -'n' 允许句点匹配换行符(Pattern.DOTALL) - -'m' 启用多行模式(Pattern.MULTILINE) - -示例: - -REGEXP_REPLACE('Hello World', ' +', ' ') -REGEXP_REPLACE('Hello WWWWorld', 'w+', 'W', 'i') - -### REGEXP_LIKE - -```REGEXP_LIKE(inputString, regexString[, flagsString])``` - -将字符串与正则表达式匹配。详情请参阅 Java Matcher.find() 方法。如果任何参数为 null(除了可选的 flagsString 参数),则结果为 null。 - -标志值限于 'i'、'c'、'n'、'm'。其他符号会引发异常。可以在一个 flagsString 参数中使用多个符号(例如 'im')。后面的标志会覆盖前面的标志,例如 'ic' 等同于区分大小写匹配 'c'。 - -'i' 启用不区分大小写匹配(Pattern.CASE_INSENSITIVE) - -'c' 禁用不区分大小写匹配(Pattern.CASE_INSENSITIVE) - -'n' 允许句点匹配换行符(Pattern.DOTALL) - -'m' 启用多行模式(Pattern.MULTILINE) - -示例: - -REGEXP_LIKE('Hello World', '[A-Z ]*', 'i') - -### REGEXP_SUBSTR - -```REGEXP_SUBSTR(inputString, regexString[, positionInt, occurrenceInt, flagsString, groupInt])``` - -将字符串与正则表达式匹配,并返回匹配的子字符串。详情请参阅 java.util.regex.Pattern 和相关功能。 - -参数 position 指定匹配应该从 inputString 的哪里开始。Occurrence 指示在 inputString 中搜索 pattern 的哪个出现。 - -标志值限于 'i'、'c'、'n'、'm'。其他符号会引发异常。可以在一个 flagsString 参数中使用多个符号(例如 'im')。后面的标志会覆盖前面的标志,例如 'ic' 等同于区分大小写匹配 'c'。 - -'i' 启用不区分大小写匹配(Pattern.CASE_INSENSITIVE) - -'c' 禁用不区分大小写匹配(Pattern.CASE_INSENSITIVE) - -'n' 允许句点匹配换行符(Pattern.DOTALL) - -'m' 启用多行模式(Pattern.MULTILINE) - -如果模式具有组,则可以使用 group 参数指定要返回的组。 - -示例: - -REGEXP_SUBSTR('2020-10-01', '\d{4}') -REGEXP_SUBSTR('2020-10-01', '(\d{4})-(\d{2})-(\d{2})', 1, 1, NULL, 2) - -### REPEAT - -```REPEAT(string, int)``` - -Returns a string repeated some number of times. - -示例: - -REPEAT(NAME || ' ', 10) - -### REPLACE - -```REPLACE(string, searchString[, replacementString])``` - -在文本中替换所有出现的搜索字符串为另一个字符串。如果没有指定替换字符串,则从原始字符串中移除搜索字符串。如果任何参数为 null,则结果为 null。 - -示例: - -REPLACE(NAME, ' ') - -### SOUNDEX - -```SOUNDEX(string)``` - -表示字符串发音。此方法返回一个字符串,如果参数为 null,则返回 null。有关更多信息,请参阅 https://en.wikipedia.org/wiki/Soundex 。 - -示例: - -SOUNDEX(NAME) - -### SPACE - -```SPACE(int)``` - -返回由一定数量的空格组成的字符串。 - -示例: - -SPACE(80) - -### SUBSTRING / SUBSTR - -```SUBSTRING | SUBSTR (string, startInt[, lengthInt ])``` - -返回从指定位置开始的字符串的子串。如果起始索引为负数,则相对于字符串的末尾计算起始索引。长度是可选的。 - -示例: - -CALL SUBSTRING('[Hello]', 2); -CALL SUBSTRING('hour', 3, 2); - -### TO_CHAR - -```TO_CHAR(value[, formatString])``` - -Oracle 兼容的 TO_CHAR 函数可用于格式化时间戳、数字或文本。 - -示例: - -CALL TO_CHAR(SYS_TIME, 'yyyy-MM-dd HH:mm:ss') - -### TRANSLATE - -```TRANSLATE(value, searchString, replacementString)``` - -Oracle 兼容的 TRANSLATE 函数用于将字符串中的一系列字符替换为另一组字符。 - -示例: - -CALL TRANSLATE('Hello world', 'eo', 'EO') - -## Numeric Functions - -### ABS - -```ABS(numeric)``` - -返回指定值的绝对值。返回的值与参数的数据类型相同。 - -请注意,TINYINT、SMALLINT、INT 和 BIGINT 数据类型无法表示它们的最小负值的绝对值,因为它们的负值比正值多。例如,对于 INT 数据类型,允许的值范围是从 -2147483648 到 2147483647。ABS(-2147483648) 应该是 2147483648,但是这个值对于这个数据类型是不允许的。这会导致异常。为了避免这种情况,请将此函数的参数转换为更高的数据类型。 - -示例: - -ABS(I) - -### ACOS - -```ACOS(numeric)``` - -计算反余弦值。另请参阅 Java Math.acos。该方法返回一个双精度浮点数。 - -示例: - -ACOS(D) - -### ASIN - -```ASIN(numeric)``` - -计算反正弦值。另请参阅 Java Math.asin。该方法返回一个双精度浮点数。 - -示例: - -ASIN(D) - -### ATAN - -```ATAN(numeric)``` - -计算反正切值。另请参阅 Java Math.atan。该方法返回一个双精度浮点数。 - -示例: - -ATAN(D) - -### COS - -```COS(numeric)``` - -计算三角余弦值。另请参阅 Java Math.cos。该方法返回一个双精度浮点数。 - -示例: - -COS(ANGLE) - -### COSH - -```COSH(numeric)``` - -计算双曲余弦值。另请参阅 Java Math.cosh。该方法返回一个双精度浮点数。 - -示例: - -COSH(X) - -### COT - -```COT(numeric)``` - -计算三角余切值(1/TAN(角度))。另请参阅 Java Math.* 函数。该方法返回一个双精度浮点数。 - -示例: - -COT(ANGLE) - -### SIN - -```SIN(numeric)``` - -计算三角正弦值。另请参阅 Java Math.sin。该方法返回一个双精度浮点数。 - -示例: - -SIN(ANGLE) - -### SINH - -```SINH(numeric)``` - -计算双曲正弦值。另请参阅 Java Math.sinh。该方法返回一个双精度浮点数。 - -示例: - -SINH(ANGLE) - -### TAN - -```TAN(numeric)``` - -计算三角正切值。另请参阅 Java Math.tan。该方法返回一个双精度浮点数。 - -示例: - -TAN(ANGLE) - -### TANH - -```TANH(numeric)``` - -计算双曲正切值。另请参阅 Java Math.tanh。该方法返回一个双精度浮点数。 - -示例: - -TANH(X) - -### MOD - -```MOD(dividendNumeric, divisorNumeric )``` - -取模运算表达式。 - -结果与除数的类型相同。如果任一参数为 NULL,则结果为 NULL。如果除数为 0,则会引发异常。结果与被除数的符号相同,或者等于 0。 - -通常情况下,参数应具有标度 0,但 H2 并不要求。 - -示例: - -MOD(A, B) - -### CEIL / CEILING - -```CEIL | CEILING (numeric)``` - -返回大于或等于参数的最小整数值。该方法返回与参数相同类型的值,但标度设置为 0,并且如果适用,则调整精度。 - -示例: - -CEIL(A) - -### EXP - -```EXP(numeric)``` - -请参阅 Java Math.exp。该方法返回一个双精度浮点数。 - -示例: - -EXP(A) - -### FLOOR - -```FLOOR(numeric)``` - -返回小于或等于参数的最大整数值。该方法返回与参数相同类型的值,但标度设置为 0,并且如果适用,则调整精度。 - -示例: - -FLOOR(A) - -### LN - -```LN(numeric)``` - -计算自然对数(以 e 为底)的双精度浮点数值。参数必须是一个正数值。 - -示例: - -LN(A) - -### LOG - -```LOG(baseNumeric, numeric)``` - -计算以指定底数的对数,返回一个双精度浮点数。参数和底数必须是正数值。底数不能等于1。 - -默认底数是 e(自然对数),在 PostgreSQL 模式下,默认底数是 10。在 MSSQLServer 模式下,可选的底数在参数之后指定。 - -LOG 函数的单参数变体已被弃用,请使用 LN 或 LOG10 替代。 - -示例: - -LOG(2, A) - -### LOG10 - -```LOG10(numeric)``` - -计算以 10 为底的对数,返回一个双精度浮点数。参数必须是一个正数值。 - -示例: - -LOG10(A) - -### RADIANS - -```RADIANS(numeric)``` - -请参阅 Java Math.toRadians。该方法返回一个双精度浮点数。 - -示例: - -RADIANS(A) - -### SQRT - -```SQRT(numeric)``` - -请参阅 Java Math.sqrt。该方法返回一个双精度浮点数。 - -示例: - -SQRT(A) - -### PI - -```PI()``` - -请参阅 Java Math.PI。该方法返回一个双精度浮点数。 - -示例: - -PI() - -### POWER - -```POWER(numeric, numeric)``` - -请参阅 Java Math.pow。该方法返回一个双精度浮点数。 - -示例: - -POWER(A, B) - -### RAND / RANDOM - -```RAND | RANDOM([ int ])``` - -如果不带参数调用该函数,则返回下一个伪随机数。如果带有参数调用,则将会给该会话的随机数生成器设定种子。该方法返回一个介于 0(包括)和 1(不包括)之间的双精度浮点数。 - -示例: - -RAND() - -### ROUND - -```ROUND(numeric[, digitsInt])``` - -四舍五入到指定的小数位数。该方法返回与参数相同类型的值,但如果适用,则调整精度和标度。 - -示例: - -ROUND(N, 2) - -### SIGN - -```SIGN(numeric)``` - -如果值小于 0,则返回 -1;如果值为零或 NaN,则返回 0;否则返回 1。 - -示例: - -SIGN(N) - -### TRUNC - -```TRUNC | TRUNCATE(numeric[, digitsInt])``` - -当指定了一个数值参数时,将其截断为指定的数字位数(接近0的下一个值),并返回与参数相同类型的值,但如果适用,则调整精度和标度。 - -示例: - -TRUNC(N, 2) - -## Time and Date Functions - -### CURRENT_DATE - -```CURRENT_DATE [()]``` - -返回当前日期。 - -这些函数在事务(默认)或命令内部返回相同的值,具体取决于数据库模式。 - -示例: - -CURRENT_DATE - -### CURRENT_TIME - -```CURRENT_TIME [()]``` - -返回带有系统时区的当前时间。实际可用的最大精度取决于操作系统和 JVM,可以是 3(毫秒)或更高。在 Java 9 之前不支持更高的精度。 - -示例: - -CURRENT_TIME - -### CURRENT_TIMESTAMP / NOW - -```CURRENT_TIMESTAMP[()] | NOW()``` - -返回带有系统时区的当前时间戳。实际可用的最大精度取决于操作系统和 JVM,可以是 3(毫秒)或更高。在 Java 9 之前不支持更高的精度。 - -示例: - -CURRENT_TIMESTAMP - -### DATEADD / TIMESTAMPADD - -```DATEADD| TIMESTAMPADD(dateAndTime, addIntLong, datetimeFieldString)``` - -将单位添加到日期时间值中。datetimeFieldString 表示单位。使用负值来减去单位。当操作毫秒、微秒或纳秒时,addIntLong 可能是一个 long 值,否则其范围被限制为 int。如果单位与指定值兼容,则此方法返回与指定值相同类型的值。如果指定的字段是 HOUR、MINUTE、SECOND、MILLISECOND 等,而值是 DATE 值,DATEADD 返回组合的 TIMESTAMP。对于 TIME 值,不允许使用 DAY、MONTH、YEAR、WEEK 等字段。 - -示例: - -DATEADD(CREATED, 1, 'MONTH') - -### DATEDIFF - -```DATEDIFF(aDateAndTime, bDateAndTime, datetimeFieldString)``` - -返回两个日期时间值之间跨越的单位边界数。此方法返回一个 long 值。datetimeField 表示单位。 - -示例: - -DATEDIFF(T1.CREATED, T2.CREATED, 'MONTH') - -### DATE_TRUNC - -```DATE_TRUNC (dateAndTime, datetimeFieldString)``` - -将指定的日期时间值截断到指定的字段。 - -示例: - -DATE_TRUNC(CREATED, 'DAY'); - -### DAYNAME - -```DAYNAME(dateAndTime)``` - -返回星期几的名称(英文)。 - -示例: - -DAYNAME(CREATED) - -### DAY_OF_MONTH - -```DAY_OF_MONTH(dateAndTime)``` - -返回月份中的日期(1-31)。 - -示例: - -DAY_OF_MONTH(CREATED) - -### DAY_OF_WEEK - -```DAY_OF_WEEK(dateAndTime)``` - -返回星期几的数值(1-7)(星期一至星期日),根据本地化设置。 - -示例: - -DAY_OF_WEEK(CREATED) - -### DAY_OF_YEAR - -```DAY_OF_YEAR(dateAndTime)``` - -返回一年中的日期(1-366)。 - -示例: - -DAY_OF_YEAR(CREATED) - -### EXTRACT - -```EXTRACT ( datetimeField FROM dateAndTime)``` - -从日期/时间值中返回特定时间单位的值。该方法对于 EPOCH 字段返回一个数值,对于其他字段返回一个整数。 - -示例: - -EXTRACT(SECOND FROM CURRENT_TIMESTAMP) - -### FORMATDATETIME - -```FORMATDATETIME (dateAndTime, formatString)``` - -将日期、时间或时间戳格式化为字符串。最重要的格式字符包括:y(年)、M(月)、d(日)、H(时)、m(分)、s(秒)。有关格式的详细信息,请参阅 java.time.format.DateTimeFormatter。 - -该方法返回一个字符串。 - -示例: - -CALL FORMATDATETIME(CREATED, 'yyyy-MM-dd HH:mm:ss') - -### HOUR - -```HOUR(dateAndTime)``` - -从日期/时间值中返回小时(0-23)。 - -示例: - -HOUR(CREATED) - -### MINUTE - -```MINUTE(dateAndTime)``` - -从日期/时间值中返回分钟(0-59)。 - -该函数已经被弃用,请使用 EXTRACT 替代。 - -示例: - -MINUTE(CREATED) - -### MONTH - -```MONTH(dateAndTime)``` - -从日期/时间值中返回月份(1-12)。 - -该函数已经被弃用,请使用 EXTRACT 替代。 - -示例: - -MONTH(CREATED) - -### MONTHNAME - -```MONTHNAME(dateAndTime)``` - -返回月份的名称(英文)。 - -示例: - -MONTHNAME(CREATED) - -### PARSEDATETIME / TO_DATE - -```PARSEDATETIME | TO_DATE(string, formatString)``` -解析一个字符串并返回一个 TIMESTAMP WITH TIME ZONE 值。最重要的格式字符包括:y(年)、M(月)、d(日)、H(时)、m(分)、s(秒)。有关格式的详细信息,请参阅 java.time.format.DateTimeFormatter。 - -示例: - -CALL PARSEDATETIME('2021-04-08 13:34:45','yyyy-MM-dd HH:mm:ss') - -### QUARTER - -```QUARTER(dateAndTime)``` - -从日期/时间值中返回季度(1-4)。 - -示例: - -QUARTER(CREATED) - -### SECOND - -```SECOND(dateAndTime)``` - -从日期/时间值中返回秒数(0-59)。 - -该函数已经被弃用,请使用 EXTRACT 替代。 - -示例: - -SECOND(CREATED) - -### WEEK - -```WEEK(dateAndTime)``` - -返回日期/时间值中的周数(1-53)。 - -该函数使用当前系统的区域设置。 - -示例: - -WEEK(CREATED) - -### YEAR - -```YEAR(dateAndTime)``` - -返回日期/时间值中的年份。 - -示例: - -YEAR(CREATED) - -### FROM_UNIXTIME - -```FROM_UNIXTIME (unixtime, formatString,timeZone)``` - -将从 UNIX 纪元(1970-01-01 00:00:00 UTC)开始的秒数转换为表示该时刻时间戳的字符串。 - -最重要的格式字符包括:y(年)、M(月)、d(日)、H(时)、m(分)、s(秒)。有关格式的详细信息,请参阅 `java.time.format.DateTimeFormatter`。 - -`timeZone` 是可选的,默认值为系统的时区。`timezone` 的值可以是一个 `UTC+ 时区偏移`,例如,`UTC+8` 表示亚洲/上海时区,请参阅 `java.time.ZoneId`。 - -该方法返回一个字符串。 - -示例: - -// 使用默认时区 - -CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss') - -or - -// 使用指定时区 - -CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss','UTC+6') - -## System Functions - -### CAST - -```CAST(value as dataType)``` - -将一个值转换为另一个数据类型。 - -支持的数据类型有:STRING | VARCHAR,INT | INTEGER,LONG | BIGINT,BYTE,FLOAT,DOUBLE,DECIMAL(p,s),TIMESTAMP,DATE,TIME - -示例: - -CONVERT(NAME AS INT) - -### COALESCE - -```COALESCE(aValue, bValue [,...])``` - -返回第一个非空值。 - -示例: - -COALESCE(A, B, C) - -### IFNULL - -```IFNULL(aValue, bValue)``` - -返回第一个非空值。 - -示例: - -IFNULL(A, B) - -### NULLIF - -```NULLIF(aValue, bValue)``` - -如果 'a' 等于 'b',则返回 NULL,否则返回 'a'。 - -示例: - -NULLIF(A, B) - -### CASE WHEN - -``` -select - case - when c_string in ('c_string') then 1 - else 0 - end as c_string_1, - case - when c_string not in ('c_string') then 1 - else 0 - end as c_string_0, - case - when c_tinyint = 117 - and TO_CHAR(c_boolean) = 'true' then 1 - else 0 - end as c_tinyint_boolean_1, - case - when c_tinyint != 117 - and TO_CHAR(c_boolean) = 'true' then 1 - else 0 - end as c_tinyint_boolean_0, - case - when c_tinyint != 117 - or TO_CHAR(c_boolean) = 'true' then 1 - else 0 - end as c_tinyint_boolean_or_1, - case - when c_int > 1 - and c_bigint > 1 - and c_float > 1 - and c_double > 1 - and c_decimal > 1 then 1 - else 0 - end as c_number_1, - case - when c_tinyint <> 117 then 1 - else 0 - end as c_number_0 -from - fake -``` - -用于确定条件是否有效,并根据不同的判断返回不同的值 - -示例: - -case when c_string in ('c_string') then 1 else 0 end diff --git a/docs/zh/transform-v2/sql-udf.md b/docs/zh/transform-v2/sql-udf.md deleted file mode 100644 index 4c1a3777408..00000000000 --- a/docs/zh/transform-v2/sql-udf.md +++ /dev/null @@ -1,133 +0,0 @@ -# SQL用户定义函数 - -> SQL 转换插件的用户定义函数 (UDF) - -## 描述 - -使用UDF SPI扩展SQL转换函数库。 - -## UDF API - -```java -package org.apache.seatunnel.transform.sql.zeta; - -public interface ZetaUDF { - /** - * Function name - * - * @return function name - */ - String functionName(); - - /** - * The type of function result - * - * @param argsType input arguments type - * @return result type - */ - SeaTunnelDataType resultType(List> argsType); - - /** - * Evaluate - * - * @param args input arguments - * @return result value - */ - Object evaluate(List args); -} -``` - -## UDF 实现示例 - -将这些依赖项添加到您的 Maven 项目,并使用 provided 作用域。 - -```xml - - - - org.apache.seatunnel - seatunnel-transforms-v2 - 2.3.2 - provided - - - org.apache.seatunnel - seatunnel-api - 2.3.2 - provided - - - com.google.auto.service - auto-service - 1.0.1 - provided - - - -``` - -添加一个 Java 类来实现 ZetaUDF,类似于以下的方式: - -```java - -@AutoService(ZetaUDF.class) -public class ExampleUDF implements ZetaUDF { - @Override - public String functionName() { - return "EXAMPLE"; - } - - @Override - public SeaTunnelDataType resultType(List> argsType) { - return BasicType.STRING_TYPE; - } - - @Override - public Object evaluate(List args) { - String arg = (String) args.get(0); - if (arg == null) return null; - return "UDF: " + arg; - } -} -``` - -打包UDF项目并将jar文件复制到路径:${SEATUNNEL_HOME}/lib - -## 示例 - -源端数据读取的表格如下: - -| id | name | age | -|----|----------|-----| -| 1 | Joy Ding | 20 | -| 2 | May Ding | 21 | -| 3 | Kin Dom | 24 | -| 4 | Joy Dom | 22 | - -我们使用SQL查询中的UDF来转换源数据,类似于以下方式: - -``` -transform { - Sql { - source_table_name = "fake" - result_table_name = "fake1" - query = "select id, example(name) as name, age from fake" - } -} -``` - -那么结果表 `fake1` 中的数据将会更新为 - -| id | name | age | -|----|---------------|-----| -| 1 | UDF: Joy Ding | 20 | -| 2 | UDF: May Ding | 21 | -| 3 | UDF: Kin Dom | 24 | -| 4 | UDF: Joy Dom | 22 | - -## 更新日志 - -### 新版本 - -- 添加SQL转换连接器的UDF - diff --git a/docs/zh/transform-v2/sql.md b/docs/zh/transform-v2/sql.md deleted file mode 100644 index ccbbc7f14cb..00000000000 --- a/docs/zh/transform-v2/sql.md +++ /dev/null @@ -1,100 +0,0 @@ -# SQL - -> SQL 转换插件 - -## 描述 - -使用 SQL 来转换给定的输入行。 - -SQL 转换使用内存中的 SQL 引擎,我们可以通过 SQL 函数和 SQL 引擎的能力来实现转换任务。 - -## 属性 - -| 名称 | 类型 | 是否必须 | 默认值 | -|-------------------|--------|------|-----| -| source_table_name | string | yes | - | -| result_table_name | string | yes | - | -| query | string | yes | - | - -### source_table_name [string] - -源表名称,查询 SQL 表名称必须与此字段匹配。 - -### query [string] - -查询 SQL,它是一个简单的 SQL,支持基本的函数和条件过滤操作。但是,复杂的 SQL 尚不支持,包括:多源表/行连接和聚合操作等。 - -## 示例 - -源端数据读取的表格如下: - -| id | name | age | -|----|----------|-----| -| 1 | Joy Ding | 20 | -| 2 | May Ding | 21 | -| 3 | Kin Dom | 24 | -| 4 | Joy Dom | 22 | - -我们使用 SQL 查询来转换源数据,类似这样: - -``` -transform { - Sql { - source_table_name = "fake" - result_table_name = "fake1" - query = "select id, concat(name, '_') as name, age+1 as age from fake where id>0" - } -} -``` - -那么结果表 `fake1` 中的数据将会更新为: - -| id | name | age | -|----|-----------|-----| -| 1 | Joy Ding_ | 21 | -| 2 | May Ding_ | 22 | -| 3 | Kin Dom_ | 25 | -| 4 | Joy Dom_ | 23 | - -## 作业配置示例 - -``` -env { - job.mode = "BATCH" -} - -source { - FakeSource { - result_table_name = "fake" - row.num = 100 - schema = { - fields { - id = "int" - name = "string" - age = "int" - } - } - } -} - -transform { - Sql { - source_table_name = "fake" - result_table_name = "fake1" - query = "select id, concat(name, '_') as name, age+1 as age from fake where id>0" - } -} - -sink { - Console { - source_table_name = "fake1" - } -} -``` - -## 更新日志 - -### 新版本 - -- 添加SQL转换连接器 - diff --git a/plugin-mapping.properties b/plugin-mapping.properties index 3ea8bfc7f7c..9e376955055 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -117,8 +117,6 @@ seatunnel.source.AmazonSqs = connector-amazonsqs seatunnel.sink.AmazonSqs = connector-amazonsqs seatunnel.source.Paimon = connector-paimon seatunnel.sink.Paimon = connector-paimon -seatunnel.source.Easysearch = connector-easysearch -seatunnel.sink.Easysearch = connector-easysearch seatunnel.source.Postgres-CDC = connector-cdc-postgres seatunnel.source.Oracle-CDC = connector-cdc-oracle seatunnel.sink.Pulsar = connector-pulsar diff --git a/release-note.md b/release-note.md index b399c161e39..3a7c084c499 100644 --- a/release-note.md +++ b/release-note.md @@ -17,7 +17,6 @@ - [Elasticsearch] Support https protocol & compatible with opensearch - [Hbase] Add hbase sink connector #4049 - [Clickhouse] Fix clickhouse old version compatibility #5326 -- [Easysearch] Support INFINI Easysearch #5933 ### Formats - [Canal]Support read canal format message #3950 - [Debezium]Support debezium canal format message #3981 diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/ReadonlyConfig.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/ReadonlyConfig.java index 81d64500dc2..16a6ad2a057 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/ReadonlyConfig.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/ReadonlyConfig.java @@ -69,10 +69,8 @@ public T get(Option option) { /** * Transform to Config todo: This method should be removed after we remove Config * - * @deprecated Please use ReadonlyConfig directly * @return Config */ - @Deprecated public Config toConfig() { return ConfigFactory.parseMap(confData); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java index 8147dfe737f..defe0a6ab98 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java @@ -57,8 +57,8 @@ public OptionRule optionRule() { .required( JdbcSourceOptions.USERNAME, JdbcSourceOptions.PASSWORD, + CatalogOptions.TABLE_NAMES, JdbcCatalogOptions.BASE_URL) - .exclusive(CatalogOptions.TABLE_NAMES, CatalogOptions.TABLE_PATTERN) .optional( JdbcSourceOptions.DATABASE_NAMES, JdbcSourceOptions.SERVER_ID, diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java index 2a0dc6b2907..c80f0dc7cea 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java @@ -54,8 +54,10 @@ public String factoryIdentifier() { @Override public OptionRule optionRule() { return JdbcSourceOptions.getBaseRule() - .required(JdbcSourceOptions.USERNAME, JdbcSourceOptions.PASSWORD) - .exclusive(CatalogOptions.TABLE_NAMES, CatalogOptions.TABLE_PATTERN) + .required( + JdbcSourceOptions.USERNAME, + JdbcSourceOptions.PASSWORD, + CatalogOptions.TABLE_NAMES) .bundled(JdbcSourceOptions.HOSTNAME, JdbcSourceOptions.PORT) .optional( JdbcCatalogOptions.BASE_URL, diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-postgres/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/source/PostgresIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-postgres/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/source/PostgresIncrementalSourceFactory.java index e75c3505ef1..7d9ddbb5b22 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-postgres/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/source/PostgresIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-postgres/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/source/PostgresIncrementalSourceFactory.java @@ -56,8 +56,8 @@ public OptionRule optionRule() { .required( JdbcSourceOptions.USERNAME, JdbcSourceOptions.PASSWORD, + CatalogOptions.TABLE_NAMES, JdbcCatalogOptions.BASE_URL) - .exclusive(CatalogOptions.TABLE_NAMES, CatalogOptions.TABLE_PATTERN) .optional( JdbcSourceOptions.DATABASE_NAMES, JdbcSourceOptions.SERVER_TIME_ZONE, diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/SqlServerIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/SqlServerIncrementalSourceFactory.java index 09a12c6e4ed..95031e9b9ff 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/SqlServerIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/SqlServerIncrementalSourceFactory.java @@ -58,8 +58,8 @@ public OptionRule optionRule() { .required( JdbcSourceOptions.USERNAME, JdbcSourceOptions.PASSWORD, + CatalogOptions.TABLE_NAMES, JdbcCatalogOptions.BASE_URL) - .exclusive(CatalogOptions.TABLE_NAMES, CatalogOptions.TABLE_PATTERN) .optional( JdbcSourceOptions.DATABASE_NAMES, JdbcSourceOptions.SERVER_TIME_ZONE, diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java index 88a24ff4fbb..4a4bf40cf31 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java @@ -35,8 +35,7 @@ public class SeaTunnelRowConverter { @Builder.Default private DateUtils.Formatter dateFormatter = DateUtils.Formatter.YYYY_MM_DD; @Builder.Default - private DateTimeUtils.Formatter dateTimeFormatter = - DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS; + private DateTimeUtils.Formatter dateTimeFormatter = DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS; @Builder.Default private TimeUtils.Formatter timeFormatter = TimeUtils.Formatter.HH_MM_SS; diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/DorisSourceFactory.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/DorisSourceFactory.java index c0e34ddf6ea..043392cc6ca 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/DorisSourceFactory.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/DorisSourceFactory.java @@ -23,8 +23,6 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.Column; -import org.apache.seatunnel.api.table.catalog.ConstraintKey; -import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.TableSchema; @@ -121,34 +119,6 @@ private static CatalogTable reconstructCatalogTable( Column column = tableColumnsMap.get(field); builder.column(column); } - if (table.getTableSchema().getPrimaryKey() != null) { - List columns = - table.getTableSchema().getPrimaryKey().getColumnNames().stream() - .filter(matchingFieldNames::contains) - .collect(Collectors.toList()); - if (!columns.isEmpty()) { - builder.primaryKey( - new PrimaryKey( - table.getTableSchema().getPrimaryKey().getPrimaryKey(), columns)); - } - } - - if (table.getTableSchema().getConstraintKeys() != null) { - List keys = - table.getTableSchema().getConstraintKeys().stream() - .filter( - k -> - k.getColumnNames().stream() - .map( - ConstraintKey.ConstraintKeyColumn - ::getColumnName) - .allMatch(matchingFieldNames::contains)) - .collect(Collectors.toList()); - if (!keys.isEmpty()) { - builder.constraintKey(keys); - } - } - table = CatalogTable.of( TableIdentifier.of( diff --git a/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java b/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java deleted file mode 100644 index 5755beb3f74..00000000000 --- a/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.doris.serialize; - -import org.apache.seatunnel.api.table.type.LocalTimeType; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.time.LocalDateTime; - -public class SeaTunnelRowConverterTest { - - private static final SeaTunnelRowConverter seaTunnelRowConverter = new SeaTunnelRowConverter(); - - @Test - void testDateTimeWithNano() { - Assertions.assertEquals( - "2021-01-01 00:00:00.123456", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 123456789))); - Assertions.assertEquals( - "2021-01-01 00:00:00.000000", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 0))); - Assertions.assertEquals( - "2021-01-01 00:00:00.000001", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 1000))); - Assertions.assertEquals( - "2021-01-01 00:00:00.000123", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 123456))); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/pom.xml b/seatunnel-connectors-v2/connector-easysearch/pom.xml deleted file mode 100644 index a121fe21868..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/pom.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - 4.0.0 - - org.apache.seatunnel - seatunnel-connectors-v2 - ${revision} - - - connector-easysearch - SeaTunnel : Connectors V2 : Easysearch - - - 1.0.1 - - - - - com.infinilabs - easysearch-client - ${easysearch-client.version} - - - org.apache.httpcomponents - httpclient - 4.5.14 - - - org.apache.httpcomponents - httpasyncclient - 4.1.4 - - - io.airlift - security - 206 - - - com.google.guava - guava - - - - - org.apache.seatunnel - connector-common - ${project.version} - compile - - - org.apache.seatunnel - seatunnel-format-json - ${project.version} - - - com.google.guava - guava - ${guava.version} - - - diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchCatalog.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchCatalog.java deleted file mode 100644 index 42f13077627..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchCatalog.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.catalog; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.configuration.util.ConfigUtil; -import org.apache.seatunnel.api.table.catalog.Catalog; -import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.PhysicalColumn; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; -import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.TableSchema; -import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseAlreadyExistException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; -import org.apache.seatunnel.api.table.catalog.exception.TableAlreadyExistException; -import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; -import org.apache.seatunnel.connectors.seatunnel.easysearch.client.EasysearchClient; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.EasysearchClusterInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.IndexDocsCount; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Lists; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static com.google.common.base.Preconditions.checkNotNull; - -/** - * Easysearch catalog implementation. - * - *

In Easysearch, we use the index as the database and table. - */ -public class EasysearchCatalog implements Catalog { - - private static final Logger LOGGER = LoggerFactory.getLogger(EasysearchCatalog.class); - - private final String catalogName; - private final String defaultDatabase; - private final Config pluginConfig; - - private EasysearchClient ezsClient; - - // todo: do we need default database? - public EasysearchCatalog(String catalogName, String defaultDatabase, Config easySearchConfig) { - this.catalogName = checkNotNull(catalogName, "catalogName cannot be null"); - this.defaultDatabase = defaultDatabase; - this.pluginConfig = checkNotNull(easySearchConfig, "easySearchConfig cannot be null"); - } - - @Override - public void open() throws CatalogException { - try { - ezsClient = EasysearchClient.createInstance(pluginConfig); - EasysearchClusterInfo easysearchClusterInfo = ezsClient.getClusterInfo(); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug( - "Success open ezs catalog: {}, cluster info: {}", - catalogName, - easysearchClusterInfo); - } - } catch (Exception e) { - throw new CatalogException(String.format("Failed to open catalog %s", catalogName), e); - } - } - - @Override - public void close() throws CatalogException { - ezsClient.close(); - } - - @Override - public String name() { - return catalogName; - } - - @Override - public String getDefaultDatabase() throws CatalogException { - return defaultDatabase; - } - - @Override - public boolean databaseExists(String databaseName) throws CatalogException { - // check if the index exist - try { - List indexDocsCount = ezsClient.getIndexDocsCount(databaseName); - return true; - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed to check if catalog %s database %s exists", - catalogName, databaseName), - e); - } - } - - @Override - public List listDatabases() throws CatalogException { - return ezsClient.listIndex(); - } - - @Override - public List listTables(String databaseName) - throws CatalogException, DatabaseNotExistException { - if (!databaseExists(databaseName)) { - throw new DatabaseNotExistException(catalogName, databaseName); - } - return Lists.newArrayList(databaseName); - } - - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - checkNotNull(tablePath); - // todo: Check if the database name is the same with table name - return databaseExists(tablePath.getTableName()); - } - - @Override - public CatalogTable getTable(TablePath tablePath) - throws CatalogException, TableNotExistException { - // Get the index mapping? - checkNotNull(tablePath, "tablePath cannot be null"); - EasysearchDataTypeConvertor easySearchDataTypeConvertor = new EasysearchDataTypeConvertor(); - TableSchema.Builder builder = TableSchema.builder(); - Map fieldTypeMapping = - ezsClient.getFieldTypeMapping(tablePath.getTableName(), Collections.emptyList()); - fieldTypeMapping.forEach( - (fieldName, fieldType) -> { - // todo: we need to add a new type TEXT or add length in STRING type - PhysicalColumn physicalColumn = - PhysicalColumn.of( - fieldName, - easySearchDataTypeConvertor.toSeaTunnelType( - fieldName, fieldType), - (Long) null, - true, - null, - null); - builder.column(physicalColumn); - }); - - return CatalogTable.of( - TableIdentifier.of( - catalogName, tablePath.getDatabaseName(), tablePath.getTableName()), - builder.build(), - buildTableOptions(tablePath), - Collections.emptyList(), - ""); - } - - @Override - public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) - throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { - // Create the index - checkNotNull(tablePath, "tablePath cannot be null"); - if (tableExists(tablePath)) { - if (ignoreIfExists) { - return; - } else { - throw new TableAlreadyExistException(catalogName, tablePath, null); - } - } - ezsClient.createIndex(tablePath.getTableName()); - } - - @Override - public void dropTable(TablePath tablePath, boolean ignoreIfNotExists) - throws TableNotExistException, CatalogException { - checkNotNull(tablePath); - if (!tableExists(tablePath) && !ignoreIfNotExists) { - throw new TableNotExistException(catalogName, tablePath); - } - try { - ezsClient.dropIndex(tablePath.getTableName()); - } catch (Exception ex) { - throw new CatalogException( - String.format( - "Failed to drop table %s in catalog %s", - tablePath.getTableName(), catalogName), - ex); - } - } - - @Override - public void createDatabase(TablePath tablePath, boolean ignoreIfExists) - throws DatabaseAlreadyExistException, CatalogException { - createTable(tablePath, null, ignoreIfExists); - } - - @Override - public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists) - throws DatabaseNotExistException, CatalogException { - dropTable(tablePath, ignoreIfNotExists); - } - - private Map buildTableOptions(TablePath tablePath) { - Map options = new HashMap<>(); - options.put("connector", "easysearch"); - // todo: Right now, we don't use the config in the plugin config, do we need to add - // bootstrap servers here? - options.put("config", ConfigUtil.convertToJsonString(tablePath)); - return options; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchCatalogFactory.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchCatalogFactory.java deleted file mode 100644 index 17ccab123d1..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchCatalogFactory.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.catalog; - -import org.apache.seatunnel.api.configuration.ReadonlyConfig; -import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.table.catalog.Catalog; -import org.apache.seatunnel.api.table.factory.CatalogFactory; - -public class EasysearchCatalogFactory implements CatalogFactory { - - @Override - public Catalog createCatalog(String catalogName, ReadonlyConfig options) { - // todo: - return null; - } - - @Override - public String factoryIdentifier() { - // todo: - return "Easysearch"; - } - - @Override - public OptionRule optionRule() { - // todo: - return null; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchDataTypeConvertor.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchDataTypeConvertor.java deleted file mode 100644 index 400fbd1c4d1..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/catalog/EasysearchDataTypeConvertor.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.catalog; - -import org.apache.seatunnel.api.table.catalog.DataTypeConvertor; -import org.apache.seatunnel.api.table.type.BasicType; -import org.apache.seatunnel.api.table.type.LocalTimeType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SqlType; - -import com.google.auto.service.AutoService; - -import java.util.Map; - -import static com.google.common.base.Preconditions.checkNotNull; - -@AutoService(DataTypeConvertor.class) -public class EasysearchDataTypeConvertor implements DataTypeConvertor { - - public static final String STRING = "string"; - public static final String KEYWORD = "keyword"; - public static final String TEXT = "text"; - public static final String BOOLEAN = "boolean"; - public static final String BYTE = "byte"; - public static final String SHORT = "short"; - public static final String INTEGER = "integer"; - public static final String LONG = "long"; - public static final String FLOAT = "float"; - public static final String HALF_FLOAT = "half_float"; - public static final String DOUBLE = "double"; - public static final String DATE = "date"; - - @Override - public SeaTunnelDataType toSeaTunnelType(String field, String connectorDataType) { - return toSeaTunnelType(field, connectorDataType, null); - } - - @Override - public SeaTunnelDataType toSeaTunnelType( - String field, String connectorDataType, Map dataTypeProperties) { - checkNotNull(connectorDataType, "connectorDataType can not be null"); - switch (connectorDataType) { - case STRING: - return BasicType.STRING_TYPE; - case KEYWORD: - return BasicType.STRING_TYPE; - case TEXT: - return BasicType.STRING_TYPE; - case BOOLEAN: - return BasicType.BOOLEAN_TYPE; - case BYTE: - return BasicType.BYTE_TYPE; - case SHORT: - return BasicType.SHORT_TYPE; - case INTEGER: - return BasicType.INT_TYPE; - case LONG: - return BasicType.LONG_TYPE; - case FLOAT: - return BasicType.FLOAT_TYPE; - case HALF_FLOAT: - return BasicType.FLOAT_TYPE; - case DOUBLE: - return BasicType.DOUBLE_TYPE; - case DATE: - return LocalTimeType.LOCAL_DATE_TIME_TYPE; - default: - return BasicType.STRING_TYPE; - } - } - - @Override - public String toConnectorType( - String field, - SeaTunnelDataType seaTunnelDataType, - Map dataTypeProperties) { - checkNotNull(seaTunnelDataType, "seaTunnelDataType can not be null"); - SqlType sqlType = seaTunnelDataType.getSqlType(); - switch (sqlType) { - case STRING: - return STRING; - case BOOLEAN: - return BOOLEAN; - case BYTES: - return BYTE; - case TINYINT: - return SHORT; - case INT: - return INTEGER; - case BIGINT: - return LONG; - case FLOAT: - return FLOAT; - case DOUBLE: - return DOUBLE; - case TIMESTAMP: - return DATE; - default: - return STRING; - } - } - - @Override - public String getIdentity() { - return "Easysearch"; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/client/EasysearchClient.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/client/EasysearchClient.java deleted file mode 100644 index e3cc7f661f2..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/client/EasysearchClient.java +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.client; - -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.TextNode; -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.common.utils.JsonUtils; -import org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.BulkResponse; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.EasysearchClusterInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.IndexDocsCount; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.ScrollResult; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; -import org.apache.seatunnel.connectors.seatunnel.easysearch.util.SSLUtils; - -import org.apache.commons.collections4.CollectionUtils; -import org.apache.http.HttpHost; -import org.apache.http.HttpStatus; -import org.apache.http.auth.AuthScope; -import org.apache.http.auth.UsernamePasswordCredentials; -import org.apache.http.client.CredentialsProvider; -import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.impl.client.BasicCredentialsProvider; -import org.apache.http.ssl.SSLContexts; -import org.apache.http.util.Asserts; -import org.apache.http.util.EntityUtils; - -import org.easysearch.client.Request; -import org.easysearch.client.Response; -import org.easysearch.client.RestClient; -import org.easysearch.client.RestClientBuilder; - -import lombok.extern.slf4j.Slf4j; - -import javax.net.ssl.SSLContext; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.function.Function; -import java.util.stream.Collectors; - -@Slf4j -public class EasysearchClient { - - private static final int CONNECTION_REQUEST_TIMEOUT = 10 * 1000; - - private static final int SOCKET_TIMEOUT = 5 * 60 * 1000; - - private final RestClient restClient; - - private EasysearchClient(RestClient restClient) { - this.restClient = restClient; - } - - public static EasysearchClient createInstance(Config pluginConfig) { - List hosts = pluginConfig.getStringList(EzsClusterConnectionConfig.HOSTS.key()); - Optional username = Optional.empty(); - Optional password = Optional.empty(); - if (pluginConfig.hasPath(EzsClusterConnectionConfig.USERNAME.key())) { - username = - Optional.of(pluginConfig.getString(EzsClusterConnectionConfig.USERNAME.key())); - if (pluginConfig.hasPath(EzsClusterConnectionConfig.PASSWORD.key())) { - password = - Optional.of( - pluginConfig.getString(EzsClusterConnectionConfig.PASSWORD.key())); - } - } - Optional keystorePath = Optional.empty(); - Optional keystorePassword = Optional.empty(); - Optional truststorePath = Optional.empty(); - Optional truststorePassword = Optional.empty(); - boolean tlsVerifyCertificate = - EzsClusterConnectionConfig.TLS_VERIFY_CERTIFICATE.defaultValue(); - if (pluginConfig.hasPath(EzsClusterConnectionConfig.TLS_VERIFY_CERTIFICATE.key())) { - tlsVerifyCertificate = - pluginConfig.getBoolean( - EzsClusterConnectionConfig.TLS_VERIFY_CERTIFICATE.key()); - } - if (tlsVerifyCertificate) { - if (pluginConfig.hasPath(EzsClusterConnectionConfig.TLS_KEY_STORE_PATH.key())) { - keystorePath = - Optional.of( - pluginConfig.getString( - EzsClusterConnectionConfig.TLS_KEY_STORE_PATH.key())); - } - if (pluginConfig.hasPath(EzsClusterConnectionConfig.TLS_KEY_STORE_PASSWORD.key())) { - keystorePassword = - Optional.of( - pluginConfig.getString( - EzsClusterConnectionConfig.TLS_KEY_STORE_PASSWORD.key())); - } - if (pluginConfig.hasPath(EzsClusterConnectionConfig.TLS_TRUST_STORE_PATH.key())) { - truststorePath = - Optional.of( - pluginConfig.getString( - EzsClusterConnectionConfig.TLS_TRUST_STORE_PATH.key())); - } - if (pluginConfig.hasPath(EzsClusterConnectionConfig.TLS_TRUST_STORE_PASSWORD.key())) { - truststorePassword = - Optional.of( - pluginConfig.getString( - EzsClusterConnectionConfig.TLS_TRUST_STORE_PASSWORD.key())); - } - } - boolean tlsVerifyHostnames = EzsClusterConnectionConfig.TLS_VERIFY_HOSTNAME.defaultValue(); - if (pluginConfig.hasPath(EzsClusterConnectionConfig.TLS_VERIFY_HOSTNAME.key())) { - tlsVerifyHostnames = - pluginConfig.getBoolean(EzsClusterConnectionConfig.TLS_VERIFY_HOSTNAME.key()); - } - return createInstance( - hosts, - username, - password, - tlsVerifyCertificate, - tlsVerifyHostnames, - keystorePath, - keystorePassword, - truststorePath, - truststorePassword); - } - - public static EasysearchClient createInstance( - List hosts, - Optional username, - Optional password, - boolean tlsVerifyCertificate, - boolean tlsVerifyHostnames, - Optional keystorePath, - Optional keystorePassword, - Optional truststorePath, - Optional truststorePassword) { - RestClientBuilder restClientBuilder = - getRestClientBuilder( - hosts, - username, - password, - tlsVerifyCertificate, - tlsVerifyHostnames, - keystorePath, - keystorePassword, - truststorePath, - truststorePassword); - return new EasysearchClient(restClientBuilder.build()); - } - - private static RestClientBuilder getRestClientBuilder( - List hosts, - Optional username, - Optional password, - boolean tlsVerifyCertificate, - boolean tlsVerifyHostnames, - Optional keystorePath, - Optional keystorePassword, - Optional truststorePath, - Optional truststorePassword) { - HttpHost[] httpHosts = new HttpHost[hosts.size()]; - for (int i = 0; i < hosts.size(); i++) { - httpHosts[i] = HttpHost.create(hosts.get(i)); - } - - RestClientBuilder restClientBuilder = - RestClient.builder(httpHosts) - .setRequestConfigCallback( - requestConfigBuilder -> - requestConfigBuilder - .setConnectionRequestTimeout( - CONNECTION_REQUEST_TIMEOUT) - .setSocketTimeout(SOCKET_TIMEOUT)); - - restClientBuilder.setHttpClientConfigCallback( - httpClientBuilder -> { - if (username.isPresent()) { - CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - credentialsProvider.setCredentials( - AuthScope.ANY, - new UsernamePasswordCredentials(username.get(), password.get())); - httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); - } - - try { - if (tlsVerifyCertificate) { - Optional sslContext = - SSLUtils.buildSSLContext( - keystorePath, - keystorePassword, - truststorePath, - truststorePassword); - sslContext.ifPresent(e -> httpClientBuilder.setSSLContext(e)); - } else { - SSLContext sslContext = - SSLContexts.custom() - .loadTrustMaterial(new TrustAllStrategy()) - .build(); - httpClientBuilder.setSSLContext(sslContext); - } - if (!tlsVerifyHostnames) { - httpClientBuilder.setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE); - } - } catch (Exception e) { - throw new RuntimeException(e); - } - return httpClientBuilder; - }); - return restClientBuilder; - } - - private static Map getFieldTypeMappingFromProperties( - JsonNode properties, List source) { - Map allEasysearchFieldTypeInfoMap = new HashMap<>(); - properties - .fields() - .forEachRemaining( - entry -> { - String fieldName = entry.getKey(); - JsonNode fieldProperty = entry.getValue(); - if (fieldProperty.has("type")) { - allEasysearchFieldTypeInfoMap.put( - fieldName, fieldProperty.get("type").asText()); - } - }); - if (CollectionUtils.isEmpty(source)) { - return allEasysearchFieldTypeInfoMap; - } - - return source.stream() - .collect( - Collectors.toMap( - Function.identity(), - fieldName -> { - String fieldType = allEasysearchFieldTypeInfoMap.get(fieldName); - if (fieldType == null) { - log.warn( - "fail to get easysearch field {} mapping type,so give a default type text", - fieldName); - return "text"; - } - return fieldType; - })); - } - - public BulkResponse bulk(String requestBody) { - Request request = new Request("POST", "/_bulk"); - request.setJsonEntity(requestBody); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.BULK_RESPONSE_ERROR, - "bulk ezs Response is null"); - } - if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { - ObjectMapper objectMapper = new ObjectMapper(); - String entity = EntityUtils.toString(response.getEntity()); - JsonNode json = objectMapper.readTree(entity); - int took = json.get("took").asInt(); - boolean errors = json.get("errors").asBoolean(); - return new BulkResponse(errors, took, entity); - } else { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.BULK_RESPONSE_ERROR, - String.format( - "bulk ezs response status code=%d,request boy=%s", - response.getStatusLine().getStatusCode(), requestBody)); - } - } catch (IOException e) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.BULK_RESPONSE_ERROR, - String.format("bulk ezs error,request boy=%s", requestBody), - e); - } - } - - public EasysearchClusterInfo getClusterInfo() { - Request request = new Request("GET", "/"); - try { - Response response = restClient.performRequest(request); - String result = EntityUtils.toString(response.getEntity()); - ObjectMapper objectMapper = new ObjectMapper(); - JsonNode jsonNode = objectMapper.readTree(result); - JsonNode versionNode = jsonNode.get("version"); - return EasysearchClusterInfo.builder() - .clusterVersion(versionNode.get("number").asText()) - .distribution( - Optional.ofNullable(versionNode.get("distribution")) - .map(e -> e.asText()) - .orElse(null)) - .build(); - } catch (IOException e) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_EZS_VERSION_FAILED, - "fail to get easysearch version.", - e); - } - } - - public void close() { - try { - restClient.close(); - } catch (IOException e) { - log.warn("close easysearch connection error", e); - } - } - - /** - * first time to request search documents by scroll call /${index}/_search?scroll=${scroll} - * - * @param index index name - * @param source select fields - * @param scrollTime such as:1m - * @param scrollSize fetch documents count in one request - */ - public ScrollResult searchByScroll( - String index, - List source, - Map query, - String scrollTime, - int scrollSize) { - Map param = new HashMap<>(); - param.put("query", query); - param.put("_source", source); - param.put("sort", new String[] {"_doc"}); - param.put("size", scrollSize); - String endpoint = "/" + index + "/_search?scroll=" + scrollTime; - ScrollResult scrollResult = - getDocsFromScrollRequest(endpoint, JsonUtils.toJsonString(param)); - return scrollResult; - } - - /** - * scroll to get result call _search/scroll - * - * @param scrollId the scroll id of the last request - * @param scrollTime such as:1m - */ - public ScrollResult searchWithScrollId(String scrollId, String scrollTime) { - Map param = new HashMap<>(); - param.put("scroll_id", scrollId); - param.put("scroll", scrollTime); - ScrollResult scrollResult = - getDocsFromScrollRequest("/_search/scroll", JsonUtils.toJsonString(param)); - return scrollResult; - } - - private ScrollResult getDocsFromScrollRequest(String endpoint, String requestBody) { - Request request = new Request("POST", endpoint); - request.setJsonEntity(requestBody); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.SCROLL_REQUEST_ERROR, - "POST " + endpoint + " response null"); - } - if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { - String entity = EntityUtils.toString(response.getEntity()); - ObjectNode responseJson = JsonUtils.parseObject(entity); - - JsonNode shards = responseJson.get("_shards"); - int totalShards = shards.get("total").intValue(); - int successful = shards.get("successful").intValue(); - Asserts.check( - totalShards == successful, - String.format( - "POST %s,total shards(%d)!= successful shards(%d)", - endpoint, totalShards, successful)); - - ScrollResult scrollResult = getDocsFromScrollResponse(responseJson); - return scrollResult; - } else { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.SCROLL_REQUEST_ERROR, - String.format( - "POST %s response status code=%d,request boy=%s", - endpoint, response.getStatusLine().getStatusCode(), requestBody)); - } - } catch (IOException e) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.SCROLL_REQUEST_ERROR, - String.format("POST %s error,request boy=%s", endpoint, requestBody), - e); - } - } - - private ScrollResult getDocsFromScrollResponse(ObjectNode responseJson) { - ScrollResult scrollResult = new ScrollResult(); - String scrollId = responseJson.get("_scroll_id").asText(); - scrollResult.setScrollId(scrollId); - - JsonNode hitsNode = responseJson.get("hits").get("hits"); - List> docs = new ArrayList<>(hitsNode.size()); - scrollResult.setDocs(docs); - - Iterator iter = hitsNode.iterator(); - while (iter.hasNext()) { - Map doc = new HashMap<>(); - JsonNode hitNode = iter.next(); - doc.put("_index", hitNode.get("_index").textValue()); - doc.put("_id", hitNode.get("_id").textValue()); - JsonNode source = hitNode.get("_source"); - for (Iterator> iterator = source.fields(); - iterator.hasNext(); ) { - Map.Entry entry = iterator.next(); - String fieldName = entry.getKey(); - if (entry.getValue() instanceof TextNode) { - doc.put(fieldName, entry.getValue().textValue()); - } else { - doc.put(fieldName, entry.getValue()); - } - } - docs.add(doc); - } - return scrollResult; - } - - public List getIndexDocsCount(String index) { - String endpoint = String.format("/_cat/indices/%s?h=index,docsCount&format=json", index); - Request request = new Request("GET", endpoint); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_INDEX_DOCS_COUNT_FAILED, - "GET " + endpoint + " response null"); - } - if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { - String entity = EntityUtils.toString(response.getEntity()); - List indexDocsCounts = - JsonUtils.toList(entity, IndexDocsCount.class); - return indexDocsCounts; - } else { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_INDEX_DOCS_COUNT_FAILED, - String.format( - "GET %s response status code=%d", - endpoint, response.getStatusLine().getStatusCode())); - } - } catch (IOException ex) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_INDEX_DOCS_COUNT_FAILED, ex); - } - } - - public List listIndex() { - String endpoint = "/_cat/indices?format=json"; - Request request = new Request("GET", endpoint); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.LIST_INDEX_FAILED, - "GET " + endpoint + " response null"); - } - if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { - String entity = EntityUtils.toString(response.getEntity()); - return JsonUtils.toList(entity, Map.class).stream() - .map(map -> map.get("index").toString()) - .collect(Collectors.toList()); - } else { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.LIST_INDEX_FAILED, - String.format( - "GET %s response status code=%d", - endpoint, response.getStatusLine().getStatusCode())); - } - } catch (IOException ex) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.LIST_INDEX_FAILED, ex); - } - } - - // todo: We don't support set the index mapping now. - public void createIndex(String indexName) { - String endpoint = String.format("/%s", indexName); - Request request = new Request("PUT", endpoint); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.CREATE_INDEX_FAILED, - "PUT " + endpoint + " response null"); - } - if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.CREATE_INDEX_FAILED, - String.format( - "PUT %s response status code=%d", - endpoint, response.getStatusLine().getStatusCode())); - } - } catch (IOException ex) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.CREATE_INDEX_FAILED, ex); - } - } - - public void dropIndex(String tableName) { - String endpoint = String.format("/%s", tableName); - Request request = new Request("DELETE", endpoint); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.DROP_INDEX_FAILED, - "DELETE " + endpoint + " response null"); - } - // todo: if the index doesn't exist, the response status code is 200? - if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { - return; - } else { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.DROP_INDEX_FAILED, - String.format( - "DELETE %s response status code=%d", - endpoint, response.getStatusLine().getStatusCode())); - } - } catch (IOException ex) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.DROP_INDEX_FAILED, ex); - } - } - - /** - * get ezs field name and type mapping realtion - * - * @param index index name - * @return {key-> field name,value->ezs type} - */ - public Map getFieldTypeMapping(String index, List source) { - String endpoint = String.format("/%s/_mappings", index); - Request request = new Request("GET", endpoint); - Map mapping = new HashMap<>(); - try { - Response response = restClient.performRequest(request); - if (response == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_INDEX_DOCS_COUNT_FAILED, - "GET " + endpoint + " response null"); - } - if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_INDEX_DOCS_COUNT_FAILED, - String.format( - "GET %s response status code=%d", - endpoint, response.getStatusLine().getStatusCode())); - } - String entity = EntityUtils.toString(response.getEntity()); - log.info(String.format("GET %s respnse=%s", endpoint, entity)); - ObjectNode responseJson = JsonUtils.parseObject(entity); - for (Iterator it = responseJson.elements(); it.hasNext(); ) { - JsonNode indexProperty = it.next(); - JsonNode mappingsProperty = indexProperty.get("mappings"); - if (mappingsProperty.has("mappingsProperty")) { - JsonNode properties = mappingsProperty.get("properties"); - mapping = getFieldTypeMappingFromProperties(properties, source); - } else { - for (JsonNode typeNode : mappingsProperty) { - JsonNode properties; - if (typeNode.has("properties")) { - properties = typeNode.get("properties"); - } else { - properties = typeNode; - } - mapping.putAll(getFieldTypeMappingFromProperties(properties, source)); - } - } - } - } catch (IOException ex) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.GET_INDEX_DOCS_COUNT_FAILED, ex); - } - return mapping; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/EzsClusterConnectionConfig.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/EzsClusterConnectionConfig.java deleted file mode 100644 index 500da50d2fe..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/EzsClusterConnectionConfig.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.config; - -import org.apache.seatunnel.api.configuration.Option; -import org.apache.seatunnel.api.configuration.Options; - -import java.util.List; - -public class EzsClusterConnectionConfig { - - public static final Option> HOSTS = - Options.key("hosts") - .listType() - .noDefaultValue() - .withDescription( - "Easysearch cluster http address, the format is host:port, allowing multiple hosts to be specified. Such as [\"host1:9200\", \"host2:9200\"]"); - - public static final Option USERNAME = - Options.key("username") - .stringType() - .noDefaultValue() - .withDescription("security username"); - - public static final Option PASSWORD = - Options.key("password") - .stringType() - .noDefaultValue() - .withDescription("security password"); - - public static final Option TLS_VERIFY_CERTIFICATE = - Options.key("tls_verify_certificate") - .booleanType() - .defaultValue(true) - .withDescription("Enable certificates validation for HTTPS endpoints"); - - public static final Option TLS_VERIFY_HOSTNAME = - Options.key("tls_verify_hostname") - .booleanType() - .defaultValue(true) - .withDescription("Enable hostname validation for HTTPS endpoints"); - - public static final Option TLS_KEY_STORE_PATH = - Options.key("tls_keystore_path") - .stringType() - .noDefaultValue() - .withDescription( - "The path to the PEM or JKS key store. This file must be readable by the operating system user running SeaTunnel."); - - public static final Option TLS_KEY_STORE_PASSWORD = - Options.key("tls_keystore_password") - .stringType() - .noDefaultValue() - .withDescription("The key password for the key store specified"); - - public static final Option TLS_TRUST_STORE_PATH = - Options.key("tls_truststore_path") - .stringType() - .noDefaultValue() - .withDescription( - "The path to PEM or JKS trust store. This file must be readable by the operating system user running SeaTunnel."); - - public static final Option TLS_TRUST_STORE_PASSWORD = - Options.key("tls_truststore_password") - .stringType() - .noDefaultValue() - .withDescription("The key password for the trust store specified"); -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/SinkConfig.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/SinkConfig.java deleted file mode 100644 index 2b07b3c342d..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/SinkConfig.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.config; - -import org.apache.seatunnel.api.configuration.Option; -import org.apache.seatunnel.api.configuration.Options; - -import java.util.List; - -public class SinkConfig { - - public static final Option INDEX = - Options.key("index") - .stringType() - .noDefaultValue() - .withDescription( - "Easysearch index name.Index support contains variables of field name,such as seatunnel_${age},and the field must appear at seatunnel row. If not, we will treat it as a normal index"); - public static final Option> PRIMARY_KEYS = - Options.key("primary_keys") - .listType(String.class) - .noDefaultValue() - .withDescription("Primary key fields used to generate the document `_id`"); - - public static final Option KEY_DELIMITER = - Options.key("key_delimiter") - .stringType() - .defaultValue("_") - .withDescription( - "Delimiter for composite keys (\"_\" by default), e.g., \"$\" would result in document `_id` \"KEY1$KEY2$KEY3\"."); - - public static final Option MAX_BATCH_SIZE = - Options.key("max_batch_size") - .intType() - .defaultValue(10) - .withDescription("batch bulk doc max size"); - - public static final Option MAX_RETRY_COUNT = - Options.key("max_retry_count") - .intType() - .defaultValue(3) - .withDescription("one bulk request max try count"); -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/SourceConfig.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/SourceConfig.java deleted file mode 100644 index f80c4aa9293..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/config/SourceConfig.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.config; - -import org.apache.seatunnel.api.configuration.Option; -import org.apache.seatunnel.api.configuration.Options; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class SourceConfig { - - public static final Option INDEX = - Options.key("index") - .stringType() - .noDefaultValue() - .withDescription("Easysearch index name, support * fuzzy matching"); - - public static final Option> SOURCE = - Options.key("source") - .listType() - .noDefaultValue() - .withDescription( - "The fields of index. You can get the document id by specifying the field _id.If sink _id to other index,you need specify an alias for _id due to the Easysearch limit"); - - public static final Option SCROLL_TIME = - Options.key("scroll_time") - .stringType() - .defaultValue("1m") - .withDescription( - "Amount of time Easysearch will keep the search context alive for scroll requests"); - - public static final Option SCROLL_SIZE = - Options.key("scroll_size") - .intType() - .defaultValue(100) - .withDescription( - "Maximum number of hits to be returned with each Easysearch scroll request"); - - public static final Option QUERY = - Options.key("query") - .objectType(Map.class) - .defaultValue( - Collections.singletonMap("match_all", new HashMap())) - .withDescription( - "Easysearch query language. You can control the range of data read"); -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/constant/EzsTypeMappingSeaTunnelType.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/constant/EzsTypeMappingSeaTunnelType.java deleted file mode 100644 index 3b8f5d32f19..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/constant/EzsTypeMappingSeaTunnelType.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.constant; - -import org.apache.seatunnel.api.table.type.BasicType; -import org.apache.seatunnel.api.table.type.LocalTimeType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; - -import java.util.HashMap; -import java.util.Map; - -public class EzsTypeMappingSeaTunnelType { - - private static final Map MAPPING = - new HashMap() { - { - put("string", BasicType.STRING_TYPE); - put("keyword", BasicType.STRING_TYPE); - put("text", BasicType.STRING_TYPE); - put("binary", BasicType.STRING_TYPE); - put("boolean", BasicType.BOOLEAN_TYPE); - put("byte", BasicType.BYTE_TYPE); - put("short", BasicType.SHORT_TYPE); - put("integer", BasicType.INT_TYPE); - put("long", BasicType.LONG_TYPE); - put("float", BasicType.FLOAT_TYPE); - put("half_float", BasicType.FLOAT_TYPE); - put("double", BasicType.DOUBLE_TYPE); - put("date", LocalTimeType.LOCAL_DATE_TIME_TYPE); - } - }; - - /** - * if not find the mapping SeaTunnelDataType will throw runtime exception - * - * @param esType - * @return - */ - public static SeaTunnelDataType getSeaTunnelDataType(String esType) { - SeaTunnelDataType seaTunnelDataType = MAPPING.get(esType); - if (seaTunnelDataType == null) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.EZS_FIELD_TYPE_NOT_SUPPORT, - String.format("easysearch type is %s", esType)); - } - return seaTunnelDataType; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/BulkResponse.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/BulkResponse.java deleted file mode 100644 index d239187415b..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/BulkResponse.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.dto; - -/** the response of bulk EZS by http request */ -public class BulkResponse { - - private boolean errors; - private int took; - private String response; - - public BulkResponse() {} - - public BulkResponse(boolean errors, int took, String response) { - this.errors = errors; - this.took = took; - this.response = response; - } - - public boolean isErrors() { - return errors; - } - - public void setErrors(boolean errors) { - this.errors = errors; - } - - public int getTook() { - return took; - } - - public void setTook(int took) { - this.took = took; - } - - public String getResponse() { - return response; - } - - public void setResponse(String response) { - this.response = response; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/EasysearchClusterInfo.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/EasysearchClusterInfo.java deleted file mode 100644 index 72453a0c5ac..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/EasysearchClusterInfo.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.dto; - -import lombok.Builder; -import lombok.Getter; -import lombok.ToString; - -@Getter -@Builder -@ToString -public class EasysearchClusterInfo { - private String distribution; - private String clusterVersion; -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/IndexInfo.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/IndexInfo.java deleted file mode 100644 index 391bf5fe688..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/IndexInfo.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.dto; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig; - -import lombok.Data; - -/** index config by seatunnel */ -@Data -public class IndexInfo { - - private String index; - private String[] primaryKeys; - private String keyDelimiter; - - public IndexInfo(Config pluginConfig) { - index = pluginConfig.getString(SinkConfig.INDEX.key()); - if (pluginConfig.hasPath(SinkConfig.PRIMARY_KEYS.key())) { - primaryKeys = - pluginConfig - .getStringList(SinkConfig.PRIMARY_KEYS.key()) - .toArray(new String[0]); - } - keyDelimiter = SinkConfig.KEY_DELIMITER.defaultValue(); - if (pluginConfig.hasPath(SinkConfig.KEY_DELIMITER.key())) { - keyDelimiter = pluginConfig.getString(SinkConfig.KEY_DELIMITER.key()); - } - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/IndexDocsCount.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/IndexDocsCount.java deleted file mode 100644 index c1e9ada08fb..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/IndexDocsCount.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source; - -public class IndexDocsCount { - - private String index; - /** index docs count */ - private Long docsCount; - - public String getIndex() { - return index; - } - - public void setIndex(String index) { - this.index = index; - } - - public Long getDocsCount() { - return docsCount; - } - - public void setDocsCount(Long docsCount) { - this.docsCount = docsCount; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/ScrollResult.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/ScrollResult.java deleted file mode 100644 index 3e4e798fddc..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/ScrollResult.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source; - -import lombok.Data; - -import java.util.List; -import java.util.Map; - -@Data -public class ScrollResult { - - private String scrollId; - private List> docs; -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/SourceIndexInfo.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/SourceIndexInfo.java deleted file mode 100644 index 2a7addfcea5..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/dto/source/SourceIndexInfo.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source; - -import lombok.AllArgsConstructor; -import lombok.Data; - -import java.io.Serializable; -import java.util.List; -import java.util.Map; - -@Data -@AllArgsConstructor -public class SourceIndexInfo implements Serializable { - private String index; - private List source; - private Map query; - private String scrollTime; - private int scrollSize; -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/exception/EasysearchConnectorErrorCode.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/exception/EasysearchConnectorErrorCode.java deleted file mode 100644 index 6ba45627904..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/exception/EasysearchConnectorErrorCode.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.exception; - -import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; - -public enum EasysearchConnectorErrorCode implements SeaTunnelErrorCode { - UNSUPPORTED_OPERATION("EASYSEARCH-COMMON-01", "Unsupported operation"), - JSON_OPERATION_FAILED("EASYSEARCH-COMMON-02", "Json covert/parse operation failed"), - SQL_OPERATION_FAILED( - "EASYSEARCH-COMMON-04", - "Sql operation failed, such as (execute,addBatch,close) etc..."), - UNSUPPORTED_DATA_TYPE("EASYSEARCH-COMMON-03", "Unsupported data type"), - BULK_RESPONSE_ERROR("EASYSEARCH-01", "Bulk ezs response error"), - GET_EZS_VERSION_FAILED("EASYSEARCH-02", "Get easysearch version failed"), - SCROLL_REQUEST_ERROR("EASYSEARCH-03", "Fail to scroll request"), - GET_INDEX_DOCS_COUNT_FAILED("EASYSEARCH-04", "Get easysearch document index count failed"), - LIST_INDEX_FAILED("EASYSEARCH-05", "List easysearch index failed"), - DROP_INDEX_FAILED("EASYSEARCH-06", "Drop easysearch index failed"), - CREATE_INDEX_FAILED("EASYSEARCH-07", "Create easysearch index failed"), - EZS_FIELD_TYPE_NOT_SUPPORT("EASYSEARCH-08", "Not support the easysearch field type"); - - private final String code; - private final String description; - - EasysearchConnectorErrorCode(String code, String description) { - this.code = code; - this.description = description; - } - - @Override - public String getCode() { - return code; - } - - @Override - public String getDescription() { - return description; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/exception/EasysearchConnectorException.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/exception/EasysearchConnectorException.java deleted file mode 100644 index ce83398f9c0..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/exception/EasysearchConnectorException.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.exception; - -import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; -import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; - -public class EasysearchConnectorException extends SeaTunnelRuntimeException { - public EasysearchConnectorException( - SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage) { - super(seaTunnelErrorCode, errorMessage); - } - - public EasysearchConnectorException( - SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage, Throwable cause) { - super(seaTunnelErrorCode, errorMessage, cause); - } - - public EasysearchConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, Throwable cause) { - super(seaTunnelErrorCode, cause); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/EasysearchRowSerializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/EasysearchRowSerializer.java deleted file mode 100644 index e57ad1fd512..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/EasysearchRowSerializer.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize; - -import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.IndexInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.IndexSerializer; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.IndexSerializerFactory; - -import lombok.NonNull; - -import java.time.temporal.Temporal; -import java.util.HashMap; -import java.util.Map; -import java.util.function.Function; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.JSON_OPERATION_FAILED; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.UNSUPPORTED_OPERATION; - -public class EasysearchRowSerializer implements SeaTunnelRowSerializer { - private final SeaTunnelRowType seaTunnelRowType; - private final ObjectMapper objectMapper = new ObjectMapper(); - - private final IndexSerializer indexSerializer; - - private final Function keyExtractor; - - public EasysearchRowSerializer(IndexInfo indexInfo, SeaTunnelRowType seaTunnelRowType) { - this.indexSerializer = - IndexSerializerFactory.getIndexSerializer(indexInfo.getIndex(), seaTunnelRowType); - this.seaTunnelRowType = seaTunnelRowType; - this.keyExtractor = - KeyExtractor.createKeyExtractor( - seaTunnelRowType, indexInfo.getPrimaryKeys(), indexInfo.getKeyDelimiter()); - } - - @Override - public String serializeRow(SeaTunnelRow row) { - switch (row.getRowKind()) { - case INSERT: - case UPDATE_AFTER: - return serializeUpsert(row); - case UPDATE_BEFORE: - case DELETE: - return serializeDelete(row); - default: - throw new EasysearchConnectorException( - UNSUPPORTED_OPERATION, "Unsupported write row kind: " + row.getRowKind()); - } - } - - private String serializeUpsert(SeaTunnelRow row) { - String key = keyExtractor.apply(row); - Map document = toDocumentMap(row); - - try { - if (key != null) { - Map upsertMetadata = createMetadata(row, key); - /** - * format example: { "update" : {"_index" : "${your_index}", "_id" : - * "${your_document_id}"} }\n { "doc" : ${your_document_json}, "doc_as_upsert" : - * true } - */ - return new StringBuilder() - .append("{ \"update\" :") - .append(objectMapper.writeValueAsString(upsertMetadata)) - .append("}") - .append("\n") - .append("{ \"doc\" :") - .append(objectMapper.writeValueAsString(document)) - .append(", \"doc_as_upsert\" : true }") - .toString(); - } else { - Map indexMetadata = createMetadata(row); - /** - * format example: { "index" : {"_index" : "${your_index}", "_id" : - * "${your_document_id}"} }\n ${your_document_json} - */ - return new StringBuilder() - .append("{ \"index\" :") - .append(objectMapper.writeValueAsString(indexMetadata)) - .append("}") - .append("\n") - .append(objectMapper.writeValueAsString(document)) - .toString(); - } - } catch (JsonProcessingException e) { - throw new EasysearchConnectorException( - JSON_OPERATION_FAILED, "Object json deserialization exception.", e); - } - } - - private String serializeDelete(SeaTunnelRow row) { - String key = keyExtractor.apply(row); - Map deleteMetadata = createMetadata(row, key); - try { - /** - * format example: { "delete" : {"_index" : "${your_index}", "_id" : - * "${your_document_id}"} } - */ - return new StringBuilder() - .append("{ \"delete\" :") - .append(objectMapper.writeValueAsString(deleteMetadata)) - .append("}") - .toString(); - } catch (JsonProcessingException e) { - throw new EasysearchConnectorException( - JSON_OPERATION_FAILED, "Object json deserialization exception.", e); - } - } - - private Map toDocumentMap(SeaTunnelRow row) { - String[] fieldNames = seaTunnelRowType.getFieldNames(); - Map doc = new HashMap<>(fieldNames.length); - Object[] fields = row.getFields(); - for (int i = 0; i < fieldNames.length; i++) { - Object value = fields[i]; - if (value instanceof Temporal) { - // jackson not support jdk8 new time api - doc.put(fieldNames[i], value.toString()); - } else { - doc.put(fieldNames[i], value); - } - } - return doc; - } - - private Map createMetadata(@NonNull SeaTunnelRow row, @NonNull String key) { - Map actionMetadata = createMetadata(row); - actionMetadata.put("_id", key); - return actionMetadata; - } - - private Map createMetadata(@NonNull SeaTunnelRow row) { - Map actionMetadata = new HashMap<>(2); - actionMetadata.put("_index", indexSerializer.serialize(row)); - return actionMetadata; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/KeyExtractor.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/KeyExtractor.java deleted file mode 100644 index c732d4288bb..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/KeyExtractor.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize; - -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; - -import lombok.AllArgsConstructor; - -import java.io.Serializable; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Function; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.UNSUPPORTED_OPERATION; - -@AllArgsConstructor -public class KeyExtractor implements Function, Serializable { - private final FieldFormatter[] fieldFormatters; - private final String keyDelimiter; - - public static Function createKeyExtractor( - SeaTunnelRowType rowType, String[] primaryKeys, String keyDelimiter) { - if (primaryKeys == null) { - return row -> null; - } - - List fieldFormatters = new ArrayList<>(primaryKeys.length); - for (String fieldName : primaryKeys) { - int fieldIndex = rowType.indexOf(fieldName); - SeaTunnelDataType fieldType = rowType.getFieldType(fieldIndex); - FieldFormatter fieldFormatter = createFieldFormatter(fieldIndex, fieldType); - fieldFormatters.add(fieldFormatter); - } - return new KeyExtractor(fieldFormatters.toArray(new FieldFormatter[0]), keyDelimiter); - } - - private static FieldFormatter createFieldFormatter( - int fieldIndex, SeaTunnelDataType fieldType) { - return row -> { - switch (fieldType.getSqlType()) { - case ROW: - case ARRAY: - case MAP: - throw new EasysearchConnectorException( - UNSUPPORTED_OPERATION, "Unsupported type: " + fieldType); - case DATE: - LocalDate localDate = (LocalDate) row.getField(fieldIndex); - return localDate.toString(); - case TIME: - LocalTime localTime = (LocalTime) row.getField(fieldIndex); - return localTime.toString(); - case TIMESTAMP: - LocalDateTime localDateTime = (LocalDateTime) row.getField(fieldIndex); - return localDateTime.toString(); - default: - return row.getField(fieldIndex).toString(); - } - }; - } - - @Override - public String apply(SeaTunnelRow row) { - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < fieldFormatters.length; i++) { - if (i > 0) { - builder.append(keyDelimiter); - } - String value = fieldFormatters[i].format(row); - builder.append(value); - } - return builder.toString(); - } - - private interface FieldFormatter extends Serializable { - String format(SeaTunnelRow row); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/SeaTunnelRowSerializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/SeaTunnelRowSerializer.java deleted file mode 100644 index f8d91bb34b4..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/SeaTunnelRowSerializer.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; - -public interface SeaTunnelRowSerializer { - - String serializeRow(SeaTunnelRow row); -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/IndexSerializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/IndexSerializer.java deleted file mode 100644 index c709fce293b..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/IndexSerializer.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; - -/** index is a variable */ -public interface IndexSerializer { - - String serialize(SeaTunnelRow row); -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/IndexSerializerFactory.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/IndexSerializerFactory.java deleted file mode 100644 index ba3eeb0cf4c..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/IndexSerializerFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index; - -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.impl.FixedValueIndexSerializer; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.impl.VariableIndexSerializer; -import org.apache.seatunnel.connectors.seatunnel.easysearch.util.RegexUtils; - -import java.util.List; - -public class IndexSerializerFactory { - - public static IndexSerializer getIndexSerializer( - String index, SeaTunnelRowType seaTunnelRowType) { - List fieldNames = RegexUtils.extractDatas(index, "\\$\\{(.*?)\\}"); - if (fieldNames != null && fieldNames.size() > 0) { - return new VariableIndexSerializer(seaTunnelRowType, index, fieldNames); - } else { - return new FixedValueIndexSerializer(index); - } - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/impl/FixedValueIndexSerializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/impl/FixedValueIndexSerializer.java deleted file mode 100644 index 6788b359e91..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/impl/FixedValueIndexSerializer.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.impl; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.IndexSerializer; - -/** index is a fixed value,not a variable */ -public class FixedValueIndexSerializer implements IndexSerializer { - - private final String index; - - public FixedValueIndexSerializer(String index) { - this.index = index; - } - - @Override - public String serialize(SeaTunnelRow row) { - return index; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/impl/VariableIndexSerializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/impl/VariableIndexSerializer.java deleted file mode 100644 index 00eb9e1b995..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/index/impl/VariableIndexSerializer.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.impl; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.index.IndexSerializer; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** index include variable */ -public class VariableIndexSerializer implements IndexSerializer { - - private final String index; - private final Map fieldIndexMap; - - private final String nullDefault = "null"; - - public VariableIndexSerializer( - SeaTunnelRowType seaTunnelRowType, String index, List fieldNames) { - this.index = index; - String[] rowFieldNames = seaTunnelRowType.getFieldNames(); - fieldIndexMap = new HashMap<>(rowFieldNames.length); - for (int i = 0; i < rowFieldNames.length; i++) { - if (fieldNames.contains(rowFieldNames[i])) { - fieldIndexMap.put(rowFieldNames[i], i); - } - } - } - - @Override - public String serialize(SeaTunnelRow row) { - String indexName = this.index; - for (Map.Entry fieldIndexEntry : fieldIndexMap.entrySet()) { - String fieldName = fieldIndexEntry.getKey(); - int fieldIndex = fieldIndexEntry.getValue(); - String value = getValue(fieldIndex, row); - indexName = indexName.replace(String.format("${%s}", fieldName), value); - } - return indexName.toLowerCase(); - } - - private String getValue(int fieldIndex, SeaTunnelRow row) { - Object valueObj = row.getField(fieldIndex); - if (valueObj == null) { - return nullDefault; - } else { - return valueObj.toString(); - } - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/DefaultSeaTunnelRowDeserializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/DefaultSeaTunnelRowDeserializer.java deleted file mode 100644 index 91c2058c2fc..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/DefaultSeaTunnelRowDeserializer.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.source; - -import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.seatunnel.shade.com.fasterxml.jackson.core.type.TypeReference; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.NullNode; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.TextNode; - -import org.apache.seatunnel.api.table.type.ArrayType; -import org.apache.seatunnel.api.table.type.BasicType; -import org.apache.seatunnel.api.table.type.DecimalType; -import org.apache.seatunnel.api.table.type.LocalTimeType; -import org.apache.seatunnel.api.table.type.MapType; -import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.utils.JsonUtils; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; - -import java.lang.reflect.Array; -import java.math.BigDecimal; -import java.time.Instant; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.Base64; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.apache.seatunnel.api.table.type.BasicType.BOOLEAN_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.BYTE_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.DOUBLE_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.FLOAT_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.INT_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.LONG_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.SHORT_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.STRING_TYPE; -import static org.apache.seatunnel.api.table.type.BasicType.VOID_TYPE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.UNSUPPORTED_DATA_TYPE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.UNSUPPORTED_OPERATION; - -public class DefaultSeaTunnelRowDeserializer implements SeaTunnelRowDeserializer { - - private final SeaTunnelRowType rowTypeInfo; - - private final ObjectMapper mapper = new ObjectMapper(); - - private final Map dateTimeFormatterMap = - new HashMap() { - { - put("yyyy-MM-dd HH".length(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH")); - put( - "yyyy-MM-dd HH:mm".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm")); - put( - "yyyyMMdd HH:mm:ss".length(), - DateTimeFormatter.ofPattern("yyyyMMdd HH:mm:ss")); - put( - "yyyy-MM-dd HH:mm:ss".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); - put( - "yyyy-MM-dd HH:mm:ss.S".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.S")); - put( - "yyyy-MM-dd HH:mm:ss.SS".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SS")); - put( - "yyyy-MM-dd HH:mm:ss.SSS".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS")); - put( - "yyyy-MM-dd HH:mm:ss.SSSS".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSS")); - put( - "yyyy-MM-dd HH:mm:ss.SSSSSS".length(), - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS")); - } - }; - - public DefaultSeaTunnelRowDeserializer(SeaTunnelRowType rowTypeInfo) { - this.rowTypeInfo = rowTypeInfo; - } - - @Override - public SeaTunnelRow deserialize(EasysearchRecord rowRecord) { - return convert(rowRecord); - } - - SeaTunnelRow convert(EasysearchRecord rowRecord) { - Object[] seaTunnelFields = new Object[rowTypeInfo.getTotalFields()]; - String fieldName = null; - Object value = null; - SeaTunnelDataType seaTunnelDataType = null; - try { - for (int i = 0; i < rowTypeInfo.getTotalFields(); i++) { - fieldName = rowTypeInfo.getFieldName(i); - value = recursiveGet(rowRecord.getDoc(), fieldName); - if (value != null) { - seaTunnelDataType = rowTypeInfo.getFieldType(i); - if (value instanceof NullNode) { - seaTunnelFields[i] = null; - } else if (value instanceof TextNode) { - seaTunnelFields[i] = - convertValue(seaTunnelDataType, ((TextNode) value).textValue()); - } else { - seaTunnelFields[i] = convertValue(seaTunnelDataType, value.toString()); - } - } - } - } catch (Exception ex) { - throw new EasysearchConnectorException( - UNSUPPORTED_OPERATION, - String.format( - "error fieldName=%s,fieldValue=%s,seaTunnelDataType=%s,rowRecord=%s", - fieldName, value, seaTunnelDataType, JsonUtils.toJsonString(rowRecord)), - ex); - } - return new SeaTunnelRow(seaTunnelFields); - } - - Object convertValue(SeaTunnelDataType fieldType, String fieldValue) - throws JsonProcessingException { - if (BOOLEAN_TYPE.equals(fieldType)) { - return Boolean.parseBoolean(fieldValue); - } else if (BYTE_TYPE.equals(fieldType)) { - return Byte.valueOf(fieldValue); - } else if (SHORT_TYPE.equals(fieldType)) { - return Short.parseShort(fieldValue); - } else if (INT_TYPE.equals(fieldType)) { - return Integer.parseInt(fieldValue); - } else if (LONG_TYPE.equals(fieldType)) { - return Long.parseLong(fieldValue); - } else if (FLOAT_TYPE.equals(fieldType)) { - return Float.parseFloat(fieldValue); - } else if (DOUBLE_TYPE.equals(fieldType)) { - return Double.parseDouble(fieldValue); - } else if (STRING_TYPE.equals(fieldType)) { - return fieldValue; - } else if (LocalTimeType.LOCAL_DATE_TYPE.equals(fieldType)) { - LocalDateTime localDateTime = parseDate(fieldValue); - return localDateTime.toLocalDate(); - } else if (LocalTimeType.LOCAL_TIME_TYPE.equals(fieldType)) { - LocalDateTime localDateTime = parseDate(fieldValue); - return localDateTime.toLocalTime(); - } else if (LocalTimeType.LOCAL_DATE_TIME_TYPE.equals(fieldType)) { - return parseDate(fieldValue); - } else if (fieldType instanceof DecimalType) { - return new BigDecimal(fieldValue); - } else if (fieldType instanceof ArrayType) { - ArrayType arrayType = (ArrayType) fieldType; - BasicType elementType = arrayType.getElementType(); - List stringList = JsonUtils.toList(fieldValue, String.class); - Object arr = Array.newInstance(elementType.getTypeClass(), stringList.size()); - for (int i = 0; i < stringList.size(); i++) { - Object convertValue = convertValue(elementType, stringList.get(i)); - Array.set(arr, i, convertValue); - } - return arr; - } else if (fieldType instanceof MapType) { - MapType mapType = (MapType) fieldType; - SeaTunnelDataType keyType = mapType.getKeyType(); - - SeaTunnelDataType valueType = mapType.getValueType(); - Map stringMap = - mapper.readValue(fieldValue, new TypeReference>() {}); - Map convertMap = new HashMap(); - for (Map.Entry entry : stringMap.entrySet()) { - Object convertKey = convertValue(keyType, entry.getKey()); - Object convertValue = convertValue(valueType, entry.getValue()); - convertMap.put(convertKey, convertValue); - } - return convertMap; - } else if (fieldType instanceof PrimitiveByteArrayType) { - return Base64.getDecoder().decode(fieldValue); - } else if (VOID_TYPE.equals(fieldType) || fieldType == null) { - return null; - } else { - throw new EasysearchConnectorException( - UNSUPPORTED_DATA_TYPE, "Unexpected value: " + fieldType); - } - } - - private LocalDateTime parseDate(String fieldValue) { - // handle strings of timestamp type - try { - long ts = Long.parseLong(fieldValue); - return LocalDateTime.ofInstant(Instant.ofEpochMilli(ts), ZoneId.systemDefault()); - } catch (NumberFormatException e) { - // no op - } - String formatDate = fieldValue.replace("T", " "); - if (fieldValue.length() == "yyyyMMdd".length() - || fieldValue.length() == "yyyy-MM-dd".length()) { - formatDate = fieldValue + " 00:00:00"; - } - DateTimeFormatter dateTimeFormatter = dateTimeFormatterMap.get(formatDate.length()); - if (dateTimeFormatter == null) { - throw new EasysearchConnectorException( - UNSUPPORTED_OPERATION, "unsupported date format"); - } - return LocalDateTime.parse(formatDate, dateTimeFormatter); - } - - Object recursiveGet(Map collect, String keyWithRecursive) { - Object value = null; - boolean isFirst = true; - for (String key : keyWithRecursive.split("\\.")) { - if (isFirst) { - value = collect.get(key); - isFirst = false; - } else if (value instanceof ObjectNode) { - value = ((ObjectNode) value).get(key); - } - } - return value; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/EasysearchRecord.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/EasysearchRecord.java deleted file mode 100644 index d62a5024a29..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/EasysearchRecord.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.source; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.ToString; - -import java.util.List; -import java.util.Map; - -@Getter -@ToString -@AllArgsConstructor -public class EasysearchRecord { - private Map doc; - private List source; -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/SeaTunnelRowDeserializer.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/SeaTunnelRowDeserializer.java deleted file mode 100644 index 1dbf178ff0c..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/serialize/source/SeaTunnelRowDeserializer.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.source; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; - -public interface SeaTunnelRowDeserializer { - - SeaTunnelRow deserialize(EasysearchRecord rowRecord); -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSink.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSink.java deleted file mode 100644 index d96f7eb6b56..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSink.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.sink; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.sink.SeaTunnelSink; -import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.state.EasysearchAggregatedCommitInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.state.EasysearchCommitInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.state.EasysearchSinkState; - -import com.google.auto.service.AutoService; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig.MAX_BATCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig.MAX_RETRY_COUNT; - -@AutoService(SeaTunnelSink.class) -public class EasysearchSink - implements SeaTunnelSink< - SeaTunnelRow, - EasysearchSinkState, - EasysearchCommitInfo, - EasysearchAggregatedCommitInfo> { - - private Config pluginConfig; - private SeaTunnelRowType seaTunnelRowType; - - private int maxBatchSize = MAX_BATCH_SIZE.defaultValue(); - - private int maxRetryCount = MAX_RETRY_COUNT.defaultValue(); - - @Override - public String getPluginName() { - return "Easysearch"; - } - - @Override - public void prepare(Config pluginConfig) throws PrepareFailException { - this.pluginConfig = pluginConfig; - if (pluginConfig.hasPath(MAX_BATCH_SIZE.key())) { - maxBatchSize = pluginConfig.getInt(MAX_BATCH_SIZE.key()); - } - if (pluginConfig.hasPath(MAX_RETRY_COUNT.key())) { - maxRetryCount = pluginConfig.getInt(MAX_RETRY_COUNT.key()); - } - } - - @Override - public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { - this.seaTunnelRowType = seaTunnelRowType; - } - - @Override - public SinkWriter createWriter( - SinkWriter.Context context) { - return new EasysearchSinkWriter( - context, seaTunnelRowType, pluginConfig, maxBatchSize, maxRetryCount); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSinkFactory.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSinkFactory.java deleted file mode 100644 index 4322bce5143..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSinkFactory.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.sink; - -import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.table.factory.Factory; -import org.apache.seatunnel.api.table.factory.TableSinkFactory; - -import com.google.auto.service.AutoService; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.HOSTS; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_KEY_STORE_PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_KEY_STORE_PATH; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_TRUST_STORE_PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_TRUST_STORE_PATH; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_VERIFY_CERTIFICATE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_VERIFY_HOSTNAME; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.USERNAME; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig.KEY_DELIMITER; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig.MAX_BATCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig.MAX_RETRY_COUNT; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SinkConfig.PRIMARY_KEYS; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig.INDEX; - -@AutoService(Factory.class) -public class EasysearchSinkFactory implements TableSinkFactory { - @Override - public String factoryIdentifier() { - return "Easysearch"; - } - - @Override - public OptionRule optionRule() { - return OptionRule.builder() - .required(HOSTS, INDEX) - .optional( - PRIMARY_KEYS, - KEY_DELIMITER, - USERNAME, - PASSWORD, - MAX_RETRY_COUNT, - MAX_BATCH_SIZE, - TLS_VERIFY_CERTIFICATE, - TLS_VERIFY_HOSTNAME, - TLS_KEY_STORE_PATH, - TLS_KEY_STORE_PASSWORD, - TLS_TRUST_STORE_PATH, - TLS_TRUST_STORE_PASSWORD) - .build(); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSinkWriter.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSinkWriter.java deleted file mode 100644 index 18bc5dfd57f..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/sink/EasysearchSinkWriter.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.sink; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.RowKind; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.utils.RetryUtils; -import org.apache.seatunnel.common.utils.RetryUtils.RetryMaterial; -import org.apache.seatunnel.connectors.seatunnel.easysearch.client.EasysearchClient; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.BulkResponse; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.IndexInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.EasysearchRowSerializer; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.SeaTunnelRowSerializer; -import org.apache.seatunnel.connectors.seatunnel.easysearch.state.EasysearchCommitInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.state.EasysearchSinkState; - -import lombok.extern.slf4j.Slf4j; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.SQL_OPERATION_FAILED; - -/** EasysearchSinkWriter is a sink writer that will write {@link SeaTunnelRow} to Easysearch. */ -@Slf4j -public class EasysearchSinkWriter - implements SinkWriter { - - private static final long DEFAULT_SLEEP_TIME_MS = 200L; - private final SinkWriter.Context context; - private final int maxBatchSize; - private final SeaTunnelRowSerializer seaTunnelRowSerializer; - private final List requestEzsList; - private EasysearchClient ezsClient; - private RetryMaterial retryMaterial; - - public EasysearchSinkWriter( - SinkWriter.Context context, - SeaTunnelRowType seaTunnelRowType, - Config pluginConfig, - int maxBatchSize, - int maxRetryCount) { - this.context = context; - this.maxBatchSize = maxBatchSize; - - IndexInfo indexInfo = new IndexInfo(pluginConfig); - ezsClient = EasysearchClient.createInstance(pluginConfig); - this.seaTunnelRowSerializer = new EasysearchRowSerializer(indexInfo, seaTunnelRowType); - - this.requestEzsList = new ArrayList<>(maxBatchSize); - this.retryMaterial = - new RetryMaterial(maxRetryCount, true, exception -> true, DEFAULT_SLEEP_TIME_MS); - } - - @Override - public void write(SeaTunnelRow element) { - if (RowKind.UPDATE_BEFORE.equals(element.getRowKind())) { - return; - } - - String indexRequestRow = seaTunnelRowSerializer.serializeRow(element); - requestEzsList.add(indexRequestRow); - if (requestEzsList.size() >= maxBatchSize) { - bulkEzsWithRetry(this.ezsClient, this.requestEzsList); - } - } - - @Override - public Optional prepareCommit() { - bulkEzsWithRetry(this.ezsClient, this.requestEzsList); - return Optional.empty(); - } - - @Override - public void abortPrepare() {} - - public synchronized void bulkEzsWithRetry( - EasysearchClient ezsClient, List requestEzsList) { - try { - RetryUtils.retryWithException( - () -> { - if (requestEzsList.size() > 0) { - String requestBody = String.join("\n", requestEzsList) + "\n"; - BulkResponse bulkResponse = ezsClient.bulk(requestBody); - if (bulkResponse.isErrors()) { - throw new EasysearchConnectorException( - EasysearchConnectorErrorCode.BULK_RESPONSE_ERROR, - "bulk ezs error: " + bulkResponse.getResponse()); - } - return bulkResponse; - } - return null; - }, - retryMaterial); - requestEzsList.clear(); - } catch (Exception e) { - throw new EasysearchConnectorException( - SQL_OPERATION_FAILED, "Easysearch execute batch statement error", e); - } - } - - @Override - public void close() throws IOException { - bulkEzsWithRetry(this.ezsClient, this.requestEzsList); - ezsClient.close(); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSource.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSource.java deleted file mode 100644 index 0650186961b..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSource.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.source; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.source.Boundedness; -import org.apache.seatunnel.api.source.SeaTunnelSource; -import org.apache.seatunnel.api.source.SourceReader; -import org.apache.seatunnel.api.source.SourceSplitEnumerator; -import org.apache.seatunnel.api.source.SupportColumnProjection; -import org.apache.seatunnel.api.source.SupportParallelism; -import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; -import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.catalog.EasysearchDataTypeConvertor; -import org.apache.seatunnel.connectors.seatunnel.easysearch.client.EasysearchClient; -import org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig; - -import org.apache.commons.collections4.CollectionUtils; - -import com.google.auto.service.AutoService; -import com.google.common.collect.Lists; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -@AutoService(SeaTunnelSource.class) -public class EasysearchSource - implements SeaTunnelSource, - SupportParallelism, - SupportColumnProjection { - - private Config pluginConfig; - - private SeaTunnelRowType rowTypeInfo; - - private List source; - - @Override - public String getPluginName() { - return "Easysearch"; - } - - @Override - public void prepare(Config pluginConfig) throws PrepareFailException { - this.pluginConfig = pluginConfig; - if (pluginConfig.hasPath(TableSchemaOptions.SCHEMA.key())) { - // todo: We need to remove the schema in EZS. - rowTypeInfo = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); - source = Arrays.asList(rowTypeInfo.getFieldNames()); - } else { - if (pluginConfig.hasPath(SourceConfig.SOURCE.key())) { - source = pluginConfig.getStringList(SourceConfig.SOURCE.key()); - } else { - source = Lists.newArrayList(); - } - EasysearchClient ezsClient = EasysearchClient.createInstance(this.pluginConfig); - Map ezsFieldType = - ezsClient.getFieldTypeMapping( - pluginConfig.getString(SourceConfig.INDEX.key()), source); - ezsClient.close(); - EasysearchDataTypeConvertor easySearchDataTypeConvertor = - new EasysearchDataTypeConvertor(); - if (CollectionUtils.isEmpty(source)) { - List keys = new ArrayList<>(ezsFieldType.keySet()); - SeaTunnelDataType[] fieldTypes = new SeaTunnelDataType[keys.size()]; - for (int i = 0; i < keys.size(); i++) { - String esType = ezsFieldType.get(keys.get(i)); - SeaTunnelDataType seaTunnelDataType = - easySearchDataTypeConvertor.toSeaTunnelType(keys.get(i), esType); - fieldTypes[i] = seaTunnelDataType; - } - rowTypeInfo = new SeaTunnelRowType(keys.toArray(new String[0]), fieldTypes); - } else { - SeaTunnelDataType[] fieldTypes = new SeaTunnelDataType[source.size()]; - for (int i = 0; i < source.size(); i++) { - String esType = ezsFieldType.get(source.get(i)); - SeaTunnelDataType seaTunnelDataType = - easySearchDataTypeConvertor.toSeaTunnelType(source.get(i), esType); - fieldTypes[i] = seaTunnelDataType; - } - rowTypeInfo = new SeaTunnelRowType(source.toArray(new String[0]), fieldTypes); - } - } - } - - @Override - public Boundedness getBoundedness() { - return Boundedness.BOUNDED; - } - - @Override - public SeaTunnelDataType getProducedType() { - return this.rowTypeInfo; - } - - @Override - public SourceReader createReader( - SourceReader.Context readerContext) { - return new EasysearchSourceReader(readerContext, pluginConfig, rowTypeInfo); - } - - @Override - public SourceSplitEnumerator createEnumerator( - SourceSplitEnumerator.Context enumeratorContext) { - return new EasysearchSourceSplitEnumerator(enumeratorContext, pluginConfig, source); - } - - @Override - public SourceSplitEnumerator restoreEnumerator( - SourceSplitEnumerator.Context enumeratorContext, - EasysearchSourceState sourceState) { - return new EasysearchSourceSplitEnumerator( - enumeratorContext, sourceState, pluginConfig, source); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceFactory.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceFactory.java deleted file mode 100644 index 3bfa0324cde..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceFactory.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.source; - -import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.source.SeaTunnelSource; -import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; -import org.apache.seatunnel.api.table.factory.Factory; -import org.apache.seatunnel.api.table.factory.TableSourceFactory; - -import com.google.auto.service.AutoService; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.HOSTS; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_KEY_STORE_PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_KEY_STORE_PATH; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_TRUST_STORE_PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_TRUST_STORE_PATH; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_VERIFY_CERTIFICATE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.TLS_VERIFY_HOSTNAME; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.EzsClusterConnectionConfig.USERNAME; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig.INDEX; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig.QUERY; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig.SCROLL_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig.SCROLL_TIME; -import static org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig.SOURCE; - -@AutoService(Factory.class) -public class EasysearchSourceFactory implements TableSourceFactory { - @Override - public String factoryIdentifier() { - return "Easysearch"; - } - - @Override - public OptionRule optionRule() { - return OptionRule.builder() - .required(HOSTS, INDEX) - .optional( - USERNAME, - PASSWORD, - SCROLL_TIME, - SCROLL_SIZE, - QUERY, - TLS_VERIFY_CERTIFICATE, - TLS_VERIFY_HOSTNAME, - TLS_KEY_STORE_PATH, - TLS_KEY_STORE_PASSWORD, - TLS_TRUST_STORE_PATH, - TLS_TRUST_STORE_PASSWORD) - .exclusive(SOURCE, TableSchemaOptions.SCHEMA) - .build(); - } - - @Override - public Class getSourceClass() { - return EasysearchSource.class; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceReader.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceReader.java deleted file mode 100644 index 17c396772a3..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceReader.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.source; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.source.Collector; -import org.apache.seatunnel.api.source.SourceReader; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.client.EasysearchClient; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.ScrollResult; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.SourceIndexInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.source.DefaultSeaTunnelRowDeserializer; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.source.EasysearchRecord; -import org.apache.seatunnel.connectors.seatunnel.easysearch.serialize.source.SeaTunnelRowDeserializer; - -import lombok.extern.slf4j.Slf4j; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Deque; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -@Slf4j -public class EasysearchSourceReader implements SourceReader { - - private final SeaTunnelRowDeserializer deserializer; - private final long pollNextWaitTime = 1000L; - private final Config pluginConfig; - SourceReader.Context context; - Deque splits = new LinkedList<>(); - boolean noMoreSplit; - private EasysearchClient ezsClient; - - public EasysearchSourceReader( - SourceReader.Context context, Config pluginConfig, SeaTunnelRowType rowTypeInfo) { - this.context = context; - this.pluginConfig = pluginConfig; - this.deserializer = new DefaultSeaTunnelRowDeserializer(rowTypeInfo); - } - - @Override - public void open() { - ezsClient = EasysearchClient.createInstance(this.pluginConfig); - } - - @Override - public void close() throws IOException { - ezsClient.close(); - } - - @Override - public void pollNext(Collector output) throws Exception { - synchronized (output.getCheckpointLock()) { - EasysearchSourceSplit split = splits.poll(); - if (split != null) { - SourceIndexInfo sourceIndexInfo = split.getSourceIndexInfo(); - ScrollResult scrollResult = - ezsClient.searchByScroll( - sourceIndexInfo.getIndex(), - sourceIndexInfo.getSource(), - sourceIndexInfo.getQuery(), - sourceIndexInfo.getScrollTime(), - sourceIndexInfo.getScrollSize()); - outputFromScrollResult(scrollResult, sourceIndexInfo.getSource(), output); - while (scrollResult.getDocs() != null && scrollResult.getDocs().size() > 0) { - scrollResult = - ezsClient.searchWithScrollId( - scrollResult.getScrollId(), sourceIndexInfo.getScrollTime()); - outputFromScrollResult(scrollResult, sourceIndexInfo.getSource(), output); - } - } else if (noMoreSplit) { - // signal to the source that we have reached the end of the data. - log.info("Closed the bounded Easysearch source"); - context.signalNoMoreElement(); - } else { - Thread.sleep(pollNextWaitTime); - } - } - } - - private void outputFromScrollResult( - ScrollResult scrollResult, List source, Collector output) { - for (Map doc : scrollResult.getDocs()) { - SeaTunnelRow seaTunnelRow = deserializer.deserialize(new EasysearchRecord(doc, source)); - output.collect(seaTunnelRow); - } - } - - @Override - public List snapshotState(long checkpointId) throws Exception { - return new ArrayList<>(splits); - } - - @Override - public void addSplits(List splits) { - this.splits.addAll(splits); - } - - @Override - public void handleNoMoreSplits() { - noMoreSplit = true; - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception {} -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceSplit.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceSplit.java deleted file mode 100644 index 12dfbf7bbb9..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceSplit.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.source; - -import org.apache.seatunnel.api.source.SourceSplit; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.SourceIndexInfo; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.ToString; - -@ToString -@AllArgsConstructor -public class EasysearchSourceSplit implements SourceSplit { - - private static final long serialVersionUID = -1L; - - private String splitId; - - @Getter private SourceIndexInfo sourceIndexInfo; - - @Override - public String splitId() { - return splitId; - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceSplitEnumerator.java deleted file mode 100644 index ea789737bbc..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceSplitEnumerator.java +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.source; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.source.SourceSplitEnumerator; -import org.apache.seatunnel.connectors.seatunnel.easysearch.client.EasysearchClient; -import org.apache.seatunnel.connectors.seatunnel.easysearch.config.SourceConfig; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.IndexDocsCount; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.SourceIndexInfo; -import org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorException; - -import lombok.extern.slf4j.Slf4j; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.apache.seatunnel.connectors.seatunnel.easysearch.exception.EasysearchConnectorErrorCode.UNSUPPORTED_OPERATION; - -@Slf4j -public class EasysearchSourceSplitEnumerator - implements SourceSplitEnumerator { - - private final Object stateLock = new Object(); - private final SourceSplitEnumerator.Context context; - private final Config pluginConfig; - private final Map> pendingSplit; - private final List source; - private EasysearchClient ezsClient; - private volatile boolean shouldEnumerate; - - public EasysearchSourceSplitEnumerator( - SourceSplitEnumerator.Context context, - Config pluginConfig, - List source) { - this(context, null, pluginConfig, source); - } - - public EasysearchSourceSplitEnumerator( - SourceSplitEnumerator.Context context, - EasysearchSourceState sourceState, - Config pluginConfig, - List source) { - this.context = context; - this.pluginConfig = pluginConfig; - this.pendingSplit = new HashMap<>(); - this.shouldEnumerate = sourceState == null; - if (sourceState != null) { - this.shouldEnumerate = sourceState.isShouldEnumerate(); - this.pendingSplit.putAll(sourceState.getPendingSplit()); - } - this.source = source; - } - - private static int getSplitOwner(String tp, int numReaders) { - return (tp.hashCode() & Integer.MAX_VALUE) % numReaders; - } - - @Override - public void open() { - ezsClient = EasysearchClient.createInstance(pluginConfig); - } - - @Override - public void run() { - Set readers = context.registeredReaders(); - if (shouldEnumerate) { - List newSplits = getEasysearchSplit(); - - synchronized (stateLock) { - addPendingSplit(newSplits); - shouldEnumerate = false; - } - - assignSplit(readers); - } - - log.debug( - "No more splits to assign." + " Sending NoMoreSplitsEvent to reader {}.", readers); - readers.forEach(context::signalNoMoreSplits); - } - - private void addPendingSplit(Collection splits) { - int readerCount = context.currentParallelism(); - for (EasysearchSourceSplit split : splits) { - int ownerReader = getSplitOwner(split.splitId(), readerCount); - log.info("Assigning {} to {} reader.", split, ownerReader); - pendingSplit.computeIfAbsent(ownerReader, r -> new ArrayList<>()).add(split); - } - } - - private void assignSplit(Collection readers) { - log.debug("Assign pendingSplits to readers {}", readers); - - for (int reader : readers) { - List assignmentForReader = pendingSplit.remove(reader); - if (assignmentForReader != null && !assignmentForReader.isEmpty()) { - log.info("Assign splits {} to reader {}", assignmentForReader, reader); - try { - context.assignSplit(reader, assignmentForReader); - } catch (Exception e) { - log.error( - "Failed to assign splits {} to reader {}", - assignmentForReader, - reader, - e); - pendingSplit.put(reader, assignmentForReader); - } - } - } - } - - private List getEasysearchSplit() { - List splits = new ArrayList<>(); - String scrollTime = SourceConfig.SCROLL_TIME.defaultValue(); - if (pluginConfig.hasPath(SourceConfig.SCROLL_TIME.key())) { - scrollTime = pluginConfig.getString(SourceConfig.SCROLL_TIME.key()); - } - int scrollSize = SourceConfig.SCROLL_SIZE.defaultValue(); - if (pluginConfig.hasPath(SourceConfig.SCROLL_SIZE.key())) { - scrollSize = pluginConfig.getInt(SourceConfig.SCROLL_SIZE.key()); - } - Map query = SourceConfig.QUERY.defaultValue(); - if (pluginConfig.hasPath(SourceConfig.QUERY.key())) { - query = (Map) pluginConfig.getAnyRef(SourceConfig.QUERY.key()); - } - - List indexDocsCounts = - ezsClient.getIndexDocsCount(pluginConfig.getString(SourceConfig.INDEX.key())); - indexDocsCounts = - indexDocsCounts.stream() - .filter(x -> x.getDocsCount() != null && x.getDocsCount() > 0) - .sorted(Comparator.comparingLong(IndexDocsCount::getDocsCount)) - .collect(Collectors.toList()); - for (IndexDocsCount indexDocsCount : indexDocsCounts) { - splits.add( - new EasysearchSourceSplit( - String.valueOf(indexDocsCount.getIndex().hashCode()), - new SourceIndexInfo( - indexDocsCount.getIndex(), - source, - query, - scrollTime, - scrollSize))); - } - return splits; - } - - @Override - public void close() throws IOException { - ezsClient.close(); - } - - @Override - public void addSplitsBack(List splits, int subtaskId) { - if (!splits.isEmpty()) { - addPendingSplit(splits); - assignSplit(Collections.singletonList(subtaskId)); - } - } - - @Override - public int currentUnassignedSplitSize() { - return pendingSplit.size(); - } - - @Override - public void handleSplitRequest(int subtaskId) { - throw new EasysearchConnectorException( - UNSUPPORTED_OPERATION, "Unsupported handleSplitRequest: " + subtaskId); - } - - @Override - public void registerReader(int subtaskId) { - log.debug("Register reader {} to IoTDBSourceSplitEnumerator.", subtaskId); - if (!pendingSplit.isEmpty()) { - assignSplit(Collections.singletonList(subtaskId)); - } - } - - @Override - public EasysearchSourceState snapshotState(long checkpointId) throws Exception { - synchronized (stateLock) { - return new EasysearchSourceState(shouldEnumerate, pendingSplit); - } - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception {} -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceState.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceState.java deleted file mode 100644 index 78fa9c0edc5..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/source/EasysearchSourceState.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.source; - -import lombok.AllArgsConstructor; -import lombok.Getter; - -import java.io.Serializable; -import java.util.List; -import java.util.Map; - -@AllArgsConstructor -@Getter -public class EasysearchSourceState implements Serializable { - private boolean shouldEnumerate; - private Map> pendingSplit; -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchAggregatedCommitInfo.java deleted file mode 100644 index 16175201278..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchAggregatedCommitInfo.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.state; - -import java.io.Serializable; - -/** Todo: we need to add a default */ -public class EasysearchAggregatedCommitInfo implements Serializable {} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchCommitInfo.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchCommitInfo.java deleted file mode 100644 index 9d524c0869c..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchCommitInfo.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.state; - -import lombok.AllArgsConstructor; -import lombok.Data; - -import java.io.Serializable; -import java.util.Properties; - -@Data -@AllArgsConstructor -public class EasysearchCommitInfo implements Serializable { - - private final String transactionId; - private final Properties kafkaProperties; - private final long producerId; - private final short epoch; -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchSinkState.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchSinkState.java deleted file mode 100644 index 9863b848355..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/state/EasysearchSinkState.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.state; - -import java.io.Serializable; - -public class EasysearchSinkState implements Serializable {} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/util/SSLUtils.java b/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/util/SSLUtils.java deleted file mode 100644 index 95b5b35b989..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/util/SSLUtils.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch.util; - -import io.airlift.security.pem.PemReader; - -import javax.net.ssl.KeyManager; -import javax.net.ssl.KeyManagerFactory; -import javax.net.ssl.SSLContext; -import javax.net.ssl.TrustManager; -import javax.net.ssl.TrustManagerFactory; -import javax.net.ssl.X509TrustManager; -import javax.security.auth.x500.X500Principal; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.security.GeneralSecurityException; -import java.security.KeyStore; -import java.security.cert.Certificate; -import java.security.cert.CertificateExpiredException; -import java.security.cert.CertificateNotYetValidException; -import java.security.cert.X509Certificate; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; - -import static java.util.Collections.list; - -@SuppressWarnings("MagicNumber") -public final class SSLUtils { - - public static Optional buildSSLContext( - Optional keyStorePath, - Optional keyStorePassword, - Optional trustStorePath, - Optional trustStorePassword) - throws GeneralSecurityException, IOException { - if (!keyStorePath.isPresent() && !trustStorePath.isPresent()) { - return Optional.empty(); - } - return Optional.of( - createSSLContext( - keyStorePath, keyStorePassword, trustStorePath, trustStorePassword)); - } - - private static SSLContext createSSLContext( - Optional keyStorePath, - Optional keyStorePassword, - Optional trustStorePath, - Optional trustStorePassword) - throws GeneralSecurityException, IOException { - // load KeyStore if configured and get KeyManagers - KeyStore keyStore = null; - KeyManager[] keyManagers = null; - if (keyStorePath.isPresent()) { - File keyStoreFile = new File(keyStorePath.get()); - char[] keyManagerPassword; - try { - // attempt to read the key store as a PEM file - keyStore = PemReader.loadKeyStore(keyStoreFile, keyStoreFile, keyStorePassword); - // for PEM encoded keys, the password is used to decrypt the specific key (and does - // not protect the keystore itself) - keyManagerPassword = new char[0]; - } catch (IOException | GeneralSecurityException ignored) { - keyManagerPassword = keyStorePassword.map(String::toCharArray).orElse(null); - - keyStore = KeyStore.getInstance(KeyStore.getDefaultType()); - try (InputStream in = new FileInputStream(keyStoreFile)) { - keyStore.load(in, keyManagerPassword); - } - } - validateCertificates(keyStore); - KeyManagerFactory keyManagerFactory = - KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm()); - keyManagerFactory.init(keyStore, keyManagerPassword); - keyManagers = keyManagerFactory.getKeyManagers(); - } - - // load TrustStore if configured, otherwise use KeyStore - KeyStore trustStore = keyStore; - if (trustStorePath.isPresent()) { - File trustStoreFile = new File(trustStorePath.get()); - trustStore = loadTrustStore(trustStoreFile, trustStorePassword); - } - - // create TrustManagerFactory - TrustManagerFactory trustManagerFactory = - TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm()); - trustManagerFactory.init(trustStore); - - // get X509TrustManager - TrustManager[] trustManagers = trustManagerFactory.getTrustManagers(); - if (trustManagers.length != 1 || !(trustManagers[0] instanceof X509TrustManager)) { - throw new RuntimeException( - "Unexpected default trust managers:" + Arrays.toString(trustManagers)); - } - // create SSLContext - SSLContext result = SSLContext.getInstance("SSL"); - result.init(keyManagers, trustManagers, null); - return result; - } - - private static KeyStore loadTrustStore(File trustStorePath, Optional trustStorePassword) - throws IOException, GeneralSecurityException { - KeyStore trustStore = KeyStore.getInstance(KeyStore.getDefaultType()); - try { - // attempt to read the trust store as a PEM file - List certificateChain = PemReader.readCertificateChain(trustStorePath); - if (!certificateChain.isEmpty()) { - trustStore.load(null, null); - for (X509Certificate certificate : certificateChain) { - X500Principal principal = certificate.getSubjectX500Principal(); - trustStore.setCertificateEntry(principal.getName(), certificate); - } - return trustStore; - } - } catch (IOException | GeneralSecurityException ignored) { - // ignored - } - - try (InputStream in = new FileInputStream(trustStorePath)) { - trustStore.load(in, trustStorePassword.map(String::toCharArray).orElse(null)); - } - return trustStore; - } - - private static void validateCertificates(KeyStore keyStore) throws GeneralSecurityException { - for (String alias : list(keyStore.aliases())) { - if (!keyStore.isKeyEntry(alias)) { - continue; - } - Certificate certificate = keyStore.getCertificate(alias); - if (!(certificate instanceof X509Certificate)) { - continue; - } - - try { - ((X509Certificate) certificate).checkValidity(); - } catch (CertificateExpiredException e) { - throw new CertificateExpiredException( - "KeyStore certificate is expired: " + e.getMessage()); - } catch (CertificateNotYetValidException e) { - throw new CertificateNotYetValidException( - "KeyStore certificate is not yet valid: " + e.getMessage()); - } - } - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/test/java/org/apache/seatunnel/connectors/seatunnel/easysearch/EasysearchFactoryTest.java b/seatunnel-connectors-v2/connector-easysearch/src/test/java/org/apache/seatunnel/connectors/seatunnel/easysearch/EasysearchFactoryTest.java deleted file mode 100644 index 6da684a5d08..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/test/java/org/apache/seatunnel/connectors/seatunnel/easysearch/EasysearchFactoryTest.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch; - -import org.apache.seatunnel.connectors.seatunnel.easysearch.sink.EasysearchSinkFactory; -import org.apache.seatunnel.connectors.seatunnel.easysearch.source.EasysearchSourceFactory; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -public class EasysearchFactoryTest { - - @Test - void optionRule() { - Assertions.assertNotNull((new EasysearchSourceFactory()).optionRule()); - Assertions.assertNotNull((new EasysearchSinkFactory()).optionRule()); - } -} diff --git a/seatunnel-connectors-v2/connector-easysearch/src/test/java/org/apache/seatunnel/connectors/seatunnel/easysearch/EasysearchSourceTest.java b/seatunnel-connectors-v2/connector-easysearch/src/test/java/org/apache/seatunnel/connectors/seatunnel/easysearch/EasysearchSourceTest.java deleted file mode 100644 index 69ba405ad59..00000000000 --- a/seatunnel-connectors-v2/connector-easysearch/src/test/java/org/apache/seatunnel/connectors/seatunnel/easysearch/EasysearchSourceTest.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.easysearch; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.table.type.BasicType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.easysearch.catalog.EasysearchDataTypeConvertor; - -import org.apache.commons.collections4.CollectionUtils; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.google.common.collect.Lists; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class EasysearchSourceTest { - - @Test - public void testPrepareWithEmptySource() throws PrepareFailException { - List source = Lists.newArrayList(); - - Map esFieldType = new HashMap<>(); - esFieldType.put("field1", "String"); - - SeaTunnelRowType rowTypeInfo = null; - EasysearchDataTypeConvertor EasySearchDataTypeConvertor = new EasysearchDataTypeConvertor(); - if (CollectionUtils.isEmpty(source)) { - List keys = new ArrayList<>(esFieldType.keySet()); - SeaTunnelDataType[] fieldTypes = new SeaTunnelDataType[keys.size()]; - for (int i = 0; i < keys.size(); i++) { - String esType = esFieldType.get(keys.get(i)); - SeaTunnelDataType seaTunnelDataType = - EasySearchDataTypeConvertor.toSeaTunnelType(keys.get(i), esType); - fieldTypes[i] = seaTunnelDataType; - } - rowTypeInfo = new SeaTunnelRowType(keys.toArray(new String[0]), fieldTypes); - } - - Assertions.assertNotNull(rowTypeInfo); - Assertions.assertEquals(rowTypeInfo.getFieldType(0), BasicType.STRING_TYPE); - } -} diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java index 9a02eb50be2..2c4449d21f9 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java @@ -37,7 +37,7 @@ public FakeDataRandomUtils(FakeConfig fakeConfig) { } private static T randomFromList(List list) { - int index = RandomUtils.nextInt(0, list.size()); + int index = RandomUtils.nextInt(0, list.size() - 1); return list.get(index); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/BaseHdfsFileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-base-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/BaseHdfsFileSource.java index 75fbd04e68f..cd42af0b931 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/BaseHdfsFileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/BaseHdfsFileSource.java @@ -102,7 +102,6 @@ public void prepare(Config pluginConfig) throws PrepareFailException { case TEXT: case JSON: case EXCEL: - case XML: SeaTunnelRowType userDefinedSchema = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml b/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml index f091e7023d9..486b75939af 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml @@ -37,8 +37,6 @@ 4.1.2 4.1.2 3.1.4 - 2.1.4 - 2.0.0 @@ -146,18 +144,6 @@ ${hadoop-minikdc.version} test - - - org.dom4j - dom4j - ${dom4j.version} - - - - jaxen - jaxen - ${jaxen.version} - diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java index c08a7a11def..520d40f9be2 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseFileSourceConfig.java @@ -93,7 +93,6 @@ private CatalogTable parseCatalogTable(ReadonlyConfig readonlyConfig) { case TEXT: case JSON: case EXCEL: - case XML: readStrategy.setSeaTunnelRowTypeInfo(catalogTable.getSeaTunnelRowType()); return newCatalogTable(catalogTable, readStrategy.getActualSeaTunnelRowTypeInfo()); case ORC: diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSinkConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSinkConfig.java index c7d4576f288..cff9fc87a98 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSinkConfig.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSinkConfig.java @@ -246,27 +246,6 @@ public class BaseSinkConfig { .noDefaultValue() .withDescription("To be written sheet name,only valid for excel files"); - public static final Option XML_ROOT_TAG = - Options.key("xml_root_tag") - .stringType() - .defaultValue("RECORDS") - .withDescription( - "Specifies the tag name of the root element within the XML file, only valid for xml files, default value is 'RECORDS'"); - - public static final Option XML_ROW_TAG = - Options.key("xml_row_tag") - .stringType() - .defaultValue("RECORD") - .withDescription( - "Specifies the tag name of the data rows within the XML file, only valid for xml files, default value is 'RECORD'"); - - public static final Option XML_USE_ATTR_FORMAT = - Options.key("xml_use_attr_format") - .booleanType() - .noDefaultValue() - .withDescription( - "Specifies whether to process data using the tag attribute format, only valid for XML files."); - public static final Option ENABLE_HEADER_WRITE = Options.key("enable_header_write") .booleanType() diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSourceConfigOptions.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSourceConfigOptions.java index 4e4c0bbef5f..11f9488ab4e 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSourceConfigOptions.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseSourceConfigOptions.java @@ -130,20 +130,6 @@ public class BaseSourceConfigOptions { .noDefaultValue() .withDescription("To be read sheet name,only valid for excel files"); - public static final Option XML_ROW_TAG = - Options.key("xml_row_tag") - .stringType() - .noDefaultValue() - .withDescription( - "Specifies the tag name of the data rows within the XML file, only valid for XML files."); - - public static final Option XML_USE_ATTR_FORMAT = - Options.key("xml_use_attr_format") - .booleanType() - .noDefaultValue() - .withDescription( - "Specifies whether to process data using the tag attribute format, only valid for XML files."); - public static final Option FILE_FILTER_PATTERN = Options.key("file_filter_pattern") .stringType() diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java index 52465fa48a4..1de8a5e02b4 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java @@ -24,14 +24,12 @@ import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.ParquetWriteStrategy; import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.TextWriteStrategy; import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.WriteStrategy; -import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.XmlWriteStrategy; import org.apache.seatunnel.connectors.seatunnel.file.source.reader.ExcelReadStrategy; import org.apache.seatunnel.connectors.seatunnel.file.source.reader.JsonReadStrategy; import org.apache.seatunnel.connectors.seatunnel.file.source.reader.OrcReadStrategy; import org.apache.seatunnel.connectors.seatunnel.file.source.reader.ParquetReadStrategy; import org.apache.seatunnel.connectors.seatunnel.file.source.reader.ReadStrategy; import org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy; -import org.apache.seatunnel.connectors.seatunnel.file.source.reader.XmlReadStrategy; import java.io.Serializable; @@ -102,17 +100,6 @@ public WriteStrategy getWriteStrategy(FileSinkConfig fileSinkConfig) { public ReadStrategy getReadStrategy() { return new ExcelReadStrategy(); } - }, - XML("xml") { - @Override - public WriteStrategy getWriteStrategy(FileSinkConfig fileSinkConfig) { - return new XmlWriteStrategy(fileSinkConfig); - } - - @Override - public ReadStrategy getReadStrategy() { - return new XmlReadStrategy(); - } }; private final String suffix; diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSinkConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSinkConfig.java index 7fe10224b71..387b4c12710 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSinkConfig.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSinkConfig.java @@ -23,7 +23,6 @@ import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseFileSinkConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSinkConfig; -import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; import org.apache.seatunnel.connectors.seatunnel.file.config.PartitionConfig; import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; @@ -72,12 +71,6 @@ public class FileSinkConfig extends BaseFileSinkConfig implements PartitionConfi private String sheetName; - private String xmlRootTag = BaseSinkConfig.XML_ROOT_TAG.defaultValue(); - - private String xmlRowTag = BaseSinkConfig.XML_ROW_TAG.defaultValue(); - - private Boolean xmlUseAttrFormat; - public FileSinkConfig(@NonNull Config config, @NonNull SeaTunnelRowType seaTunnelRowTypeInfo) { super(config); checkArgument( @@ -191,25 +184,5 @@ public FileSinkConfig(@NonNull Config config, @NonNull SeaTunnelRowType seaTunne if (config.hasPath(BaseSinkConfig.SHEET_NAME.key())) { this.sheetName = config.getString(BaseSinkConfig.SHEET_NAME.key()); } - - if (FileFormat.XML - .name() - .equalsIgnoreCase(config.getString(BaseSinkConfig.FILE_FORMAT_TYPE.key()))) { - if (!config.hasPath(BaseSinkConfig.XML_USE_ATTR_FORMAT.key())) { - throw new FileConnectorException( - CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT, - "User must define xml_use_attr_format when file_format_type is xml"); - } - - this.xmlUseAttrFormat = config.getBoolean(BaseSinkConfig.XML_USE_ATTR_FORMAT.key()); - - if (config.hasPath(BaseSinkConfig.XML_ROOT_TAG.key())) { - this.xmlRootTag = config.getString(BaseSinkConfig.XML_ROOT_TAG.key()); - } - - if (config.hasPath(BaseSinkConfig.XML_ROW_TAG.key())) { - this.xmlRowTag = config.getString(BaseSinkConfig.XML_ROW_TAG.key()); - } - } } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/util/XmlWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/util/XmlWriter.java deleted file mode 100644 index 2617817f7d7..00000000000 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/util/XmlWriter.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.file.sink.util; - -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; -import org.apache.seatunnel.common.utils.JsonUtils; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; -import org.apache.seatunnel.connectors.seatunnel.file.sink.config.FileSinkConfig; - -import org.dom4j.Document; -import org.dom4j.DocumentHelper; -import org.dom4j.Element; -import org.dom4j.io.OutputFormat; -import org.dom4j.io.XMLWriter; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.util.AbstractMap; -import java.util.List; - -/** The XmlWriter class provides functionality to write data in XML format. */ -public class XmlWriter { - - private final FileSinkConfig fileSinkConfig; - private final List sinkColumnsIndexInRow; - private final SeaTunnelRowType seaTunnelRowType; - private final Document document; - private final Element rootElement; - private final String fieldDelimiter; - private OutputFormat format; - - public XmlWriter( - FileSinkConfig fileSinkConfig, - List sinkColumnsIndexInRow, - SeaTunnelRowType seaTunnelRowType) { - this.fileSinkConfig = fileSinkConfig; - this.sinkColumnsIndexInRow = sinkColumnsIndexInRow; - this.seaTunnelRowType = seaTunnelRowType; - - this.fieldDelimiter = fileSinkConfig.getFieldDelimiter(); - - setXmlOutputFormat(); - document = DocumentHelper.createDocument(); - rootElement = document.addElement(fileSinkConfig.getXmlRootTag()); - } - - public void writeData(SeaTunnelRow seaTunnelRow) { - Element rowElement = rootElement.addElement(fileSinkConfig.getXmlRowTag()); - boolean useAttributeFormat = fileSinkConfig.getXmlUseAttrFormat(); - - sinkColumnsIndexInRow.stream() - .map( - index -> - new AbstractMap.SimpleEntry<>( - seaTunnelRowType.getFieldName(index), - convertToXmlString( - seaTunnelRow.getField(index), - seaTunnelRowType.getFieldType(index)))) - .forEach( - entry -> { - if (useAttributeFormat) { - rowElement.addAttribute(entry.getKey(), entry.getValue()); - } else { - rowElement.addElement(entry.getKey()).addText(entry.getValue()); - } - }); - } - - private String convertToXmlString(Object fieldValue, SeaTunnelDataType fieldType) { - if (fieldValue == null) { - return ""; - } - - switch (fieldType.getSqlType()) { - case STRING: - case DATE: - case TIME: - case TIMESTAMP: - case TINYINT: - case SMALLINT: - case INT: - case BIGINT: - case DOUBLE: - case FLOAT: - case DECIMAL: - case BOOLEAN: - return fieldValue.toString(); - case NULL: - return ""; - case ROW: - Object[] fields = ((SeaTunnelRow) fieldValue).getFields(); - String[] strings = new String[fields.length]; - for (int i = 0; i < fields.length; i++) { - strings[i] = - convertToXmlString( - fields[i], ((SeaTunnelRowType) fieldType).getFieldType(i)); - } - return String.join(fieldDelimiter, strings); - case MAP: - case ARRAY: - return JsonUtils.toJsonString(fieldValue); - case BYTES: - return new String((byte[]) fieldValue, StandardCharsets.UTF_8); - default: - throw new FileConnectorException( - CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, - "SeaTunnel format not support this data type " + fieldType.getSqlType()); - } - } - - public void flushAndCloseXmlWriter(OutputStream output) throws IOException { - XMLWriter xmlWriter = new XMLWriter(output, format); - xmlWriter.write(document); - xmlWriter.close(); - } - - private void setXmlOutputFormat() { - this.format = OutputFormat.createPrettyPrint(); - this.format.setNewlines(true); - this.format.setNewLineAfterDeclaration(true); - this.format.setSuppressDeclaration(false); - this.format.setExpandEmptyElements(false); - this.format.setEncoding(StandardCharsets.UTF_8.name()); - this.format.setIndent("\t"); - } -} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/XmlWriteStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/XmlWriteStrategy.java deleted file mode 100644 index 74fa220031d..00000000000 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/XmlWriteStrategy.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.file.sink.writer; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.common.exception.CommonError; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; -import org.apache.seatunnel.connectors.seatunnel.file.sink.config.FileSinkConfig; -import org.apache.seatunnel.connectors.seatunnel.file.sink.util.XmlWriter; - -import org.apache.hadoop.fs.FSDataOutputStream; - -import java.io.IOException; -import java.util.LinkedHashMap; - -/** - * An implementation of the AbstractWriteStrategy class that writes data in XML format. - * - *

This strategy stores multiple XmlWriter instances for different files being written and - * ensures that each file is written to only once. It writes the data by passing the data row to the - * corresponding XmlWriter instance. - */ -public class XmlWriteStrategy extends AbstractWriteStrategy { - - private final LinkedHashMap beingWrittenWriter; - - public XmlWriteStrategy(FileSinkConfig fileSinkConfig) { - super(fileSinkConfig); - this.beingWrittenWriter = new LinkedHashMap<>(); - } - - @Override - public void write(SeaTunnelRow seaTunnelRow) throws FileConnectorException { - super.write(seaTunnelRow); - String filePath = getOrCreateFilePathBeingWritten(seaTunnelRow); - XmlWriter xmlDocWriter = getOrCreateXmlWriter(filePath); - xmlDocWriter.writeData(seaTunnelRow); - } - - @Override - public void finishAndCloseFile() { - this.beingWrittenWriter.forEach( - (k, v) -> { - try { - hadoopFileSystemProxy.createFile(k); - FSDataOutputStream fileOutputStream = - hadoopFileSystemProxy.getOutputStream(k); - v.flushAndCloseXmlWriter(fileOutputStream); - fileOutputStream.close(); - } catch (IOException e) { - throw CommonError.fileOperationFailed("XmlFile", "write", k, e); - } - needMoveFiles.put(k, getTargetLocation(k)); - }); - this.beingWrittenWriter.clear(); - } - - private XmlWriter getOrCreateXmlWriter(String filePath) { - return beingWrittenWriter.computeIfAbsent( - filePath, - k -> new XmlWriter(fileSinkConfig, sinkColumnsIndexInRow, seaTunnelRowType)); - } -} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/XmlReadStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/XmlReadStrategy.java deleted file mode 100644 index 0752bf52a85..00000000000 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/XmlReadStrategy.java +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.file.source.reader; - -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; - -import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; -import org.apache.seatunnel.api.configuration.Option; -import org.apache.seatunnel.api.source.Collector; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.api.table.type.SqlType; -import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; -import org.apache.seatunnel.common.utils.DateTimeUtils; -import org.apache.seatunnel.common.utils.DateUtils; -import org.apache.seatunnel.common.utils.TimeUtils; -import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSourceConfigOptions; -import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; - -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.commons.lang3.StringUtils; - -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; - -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; - -import java.io.IOException; -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -/** The XmlReadStrategy class is used to read data from XML files in SeaTunnel. */ -@Slf4j -public class XmlReadStrategy extends AbstractReadStrategy { - - private String tableRowName; - private Boolean useAttrFormat; - private String delimiter; - - private int fieldCount; - - private DateUtils.Formatter dateFormat; - private DateTimeUtils.Formatter datetimeFormat; - private TimeUtils.Formatter timeFormat; - - private final ObjectMapper objectMapper = new ObjectMapper(); - - @Override - public void init(HadoopConf conf) { - super.init(conf); - preCheckAndInitializeConfiguration(); - } - - @Override - public void read(String path, String tableId, Collector output) - throws IOException, FileConnectorException { - Map partitionsMap = parsePartitionsByPath(path); - SAXReader saxReader = new SAXReader(); - Document document; - try { - document = saxReader.read(hadoopFileSystemProxy.getInputStream(path)); - } catch (DocumentException e) { - throw new FileConnectorException( - FileConnectorErrorCode.FILE_READ_FAILED, "Failed to read xml file: " + path, e); - } - Element rootElement = document.getRootElement(); - - fieldCount = - isMergePartition - ? seaTunnelRowTypeWithPartition.getTotalFields() - : seaTunnelRowType.getTotalFields(); - - rootElement - .selectNodes(getXPathExpression(tableRowName)) - .forEach( - node -> { - SeaTunnelRow seaTunnelRow = new SeaTunnelRow(fieldCount); - - List fields = - new ArrayList<>( - (useAttrFormat - ? ((Element) node).attributes() - : node.selectNodes("./*"))) - .stream() - .filter( - field -> - ArrayUtils.contains( - seaTunnelRowType - .getFieldNames(), - field.getName())) - .collect(Collectors.toList()); - - if (CollectionUtils.isEmpty(fields)) return; - - fields.forEach( - field -> { - int fieldIndex = - ArrayUtils.indexOf( - seaTunnelRowType.getFieldNames(), - field.getName()); - seaTunnelRow.setField( - fieldIndex, - convert( - field.getText(), - seaTunnelRowType - .getFieldTypes()[fieldIndex])); - }); - - if (isMergePartition) { - int partitionIndex = seaTunnelRowType.getTotalFields(); - for (String value : partitionsMap.values()) { - seaTunnelRow.setField(partitionIndex++, value); - } - } - - seaTunnelRow.setTableId(tableId); - output.collect(seaTunnelRow); - }); - } - - @Override - public SeaTunnelRowType getSeaTunnelRowTypeInfo(String path) throws FileConnectorException { - throw new FileConnectorException( - CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, - "User must defined schema for xml file type"); - } - - @Override - public void setSeaTunnelRowTypeInfo(SeaTunnelRowType seaTunnelRowType) { - if (ArrayUtils.isEmpty(seaTunnelRowType.getFieldNames()) - || ArrayUtils.isEmpty(seaTunnelRowType.getFieldTypes())) { - throw new FileConnectorException( - CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT, - "Schema information is undefined or misconfigured, please check your configuration file."); - } - - if (readColumns.isEmpty()) { - this.seaTunnelRowType = seaTunnelRowType; - this.seaTunnelRowTypeWithPartition = - mergePartitionTypes(fileNames.get(0), seaTunnelRowType); - } else { - if (readColumns.retainAll(Arrays.asList(seaTunnelRowType.getFieldNames()))) { - log.warn( - "The read columns configuration will be filtered by the schema configuration, this may cause the actual results to be inconsistent with expectations. This is due to read columns not being a subset of the schema, " - + "maybe you should check the schema and read_columns!"); - } - int[] indexes = new int[readColumns.size()]; - String[] fields = new String[readColumns.size()]; - SeaTunnelDataType[] types = new SeaTunnelDataType[readColumns.size()]; - for (int i = 0; i < readColumns.size(); i++) { - indexes[i] = seaTunnelRowType.indexOf(readColumns.get(i)); - fields[i] = seaTunnelRowType.getFieldName(indexes[i]); - types[i] = seaTunnelRowType.getFieldType(indexes[i]); - } - this.seaTunnelRowType = new SeaTunnelRowType(fields, types); - this.seaTunnelRowTypeWithPartition = - mergePartitionTypes(fileNames.get(0), this.seaTunnelRowType); - } - } - - @SneakyThrows - private Object convert(String fieldValue, SeaTunnelDataType fieldType) { - if (StringUtils.isBlank(fieldValue)) { - return ""; - } - SqlType sqlType = fieldType.getSqlType(); - switch (sqlType) { - case STRING: - return fieldValue; - case DATE: - return DateUtils.parse(fieldValue, dateFormat); - case TIME: - return TimeUtils.parse(fieldValue, timeFormat); - case TIMESTAMP: - return DateTimeUtils.parse(fieldValue, datetimeFormat); - case TINYINT: - return (byte) Double.parseDouble(fieldValue); - case SMALLINT: - return (short) Double.parseDouble(fieldValue); - case INT: - return (int) Double.parseDouble(fieldValue); - case BIGINT: - return (long) Double.parseDouble(fieldValue); - case DOUBLE: - return Double.parseDouble(fieldValue); - case FLOAT: - return (float) Double.parseDouble(fieldValue); - case DECIMAL: - return BigDecimal.valueOf(Double.parseDouble(fieldValue)); - case BOOLEAN: - return Boolean.parseBoolean(fieldValue); - case BYTES: - return fieldValue.getBytes(StandardCharsets.UTF_8); - case NULL: - return ""; - case ROW: - String[] context = fieldValue.split(delimiter); - SeaTunnelRowType ft = (SeaTunnelRowType) fieldType; - SeaTunnelRow row = new SeaTunnelRow(context.length); - IntStream.range(0, context.length) - .forEach(i -> row.setField(i, convert(context[i], ft.getFieldTypes()[i]))); - return row; - case MAP: - case ARRAY: - return objectMapper.readValue(fieldValue, fieldType.getTypeClass()); - default: - throw new FileConnectorException( - CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, - String.format("Unsupported data type: %s", sqlType)); - } - } - - private String getXPathExpression(String tableRowIdentification) { - return String.format("//%s", tableRowIdentification); - } - - /** Performs pre-checks and initialization of the configuration for reading XML files. */ - private void preCheckAndInitializeConfiguration() { - this.tableRowName = getPrimitiveConfigValue(BaseSourceConfigOptions.XML_ROW_TAG); - this.useAttrFormat = getPrimitiveConfigValue(BaseSourceConfigOptions.XML_USE_ATTR_FORMAT); - - // Check mandatory configurations - if (StringUtils.isEmpty(tableRowName) || useAttrFormat == null) { - throw new FileConnectorException( - SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, - String.format( - "Mandatory configurations '%s' and '%s' must be specified when reading XML files.", - BaseSourceConfigOptions.XML_ROW_TAG.key(), - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT.key())); - } - - this.delimiter = getPrimitiveConfigValue(BaseSourceConfigOptions.FIELD_DELIMITER); - - this.dateFormat = - getComplexDateConfigValue( - BaseSourceConfigOptions.DATE_FORMAT, DateUtils.Formatter::parse); - this.timeFormat = - getComplexDateConfigValue( - BaseSourceConfigOptions.TIME_FORMAT, TimeUtils.Formatter::parse); - this.datetimeFormat = - getComplexDateConfigValue( - BaseSourceConfigOptions.DATETIME_FORMAT, DateTimeUtils.Formatter::parse); - } - - /** - * Retrieves the value of a primitive configuration option. - * - * @param option the configuration option to retrieve the value for - * @param the type of the configuration option - * @return the value of the configuration option, or the default value if the option is not set - */ - @SuppressWarnings("unchecked") - private T getPrimitiveConfigValue(Option option) { - if (!pluginConfig.hasPath(option.key())) { - return (T) option.defaultValue(); - } - return (T) pluginConfig.getAnyRef(option.key()); - } - - /** - * Retrieves the complex date configuration value for the given option. - * - * @param option The configuration option to retrieve. - * @param parser The function used to parse the configuration value. - * @param The type of the configuration value. - * @return The parsed configuration value or the default value if not found. - */ - @SuppressWarnings("unchecked") - private T getComplexDateConfigValue(Option option, Function parser) { - if (!pluginConfig.hasPath(option.key())) { - return (T) option.defaultValue(); - } - return parser.apply(pluginConfig.getString(option.key())); - } -} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/XmlReadStrategyTest.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/XmlReadStrategyTest.java deleted file mode 100644 index 0679bade2d6..00000000000 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/XmlReadStrategyTest.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.file.writer; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; -import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; - -import org.apache.seatunnel.api.source.Collector; -import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.utils.DateTimeUtils; -import org.apache.seatunnel.common.utils.DateUtils; -import org.apache.seatunnel.common.utils.TimeUtils; -import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; -import org.apache.seatunnel.connectors.seatunnel.file.source.reader.XmlReadStrategy; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import lombok.Getter; - -import java.io.File; -import java.io.IOException; -import java.math.BigDecimal; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Paths; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; - -import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT; - -public class XmlReadStrategyTest { - - @Test - public void testXmlRead() throws IOException, URISyntaxException { - URL xmlFile = XmlReadStrategyTest.class.getResource("/xml/name=xmlTest/test_read.xml"); - URL conf = XmlReadStrategyTest.class.getResource("/xml/test_read_xml.conf"); - Assertions.assertNotNull(xmlFile); - Assertions.assertNotNull(conf); - String xmlFilePath = Paths.get(xmlFile.toURI()).toString(); - String confPath = Paths.get(conf.toURI()).toString(); - Config pluginConfig = ConfigFactory.parseFile(new File(confPath)); - XmlReadStrategy xmlReadStrategy = new XmlReadStrategy(); - LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT); - xmlReadStrategy.setPluginConfig(pluginConfig); - xmlReadStrategy.init(localConf); - List fileNamesByPath = xmlReadStrategy.getFileNamesByPath(xmlFilePath); - SeaTunnelRowType userDefinedSchema = - CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); - xmlReadStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); - TestCollector testCollector = new TestCollector(); - xmlReadStrategy.read(fileNamesByPath.get(0), "", testCollector); - for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) { - Assertions.assertEquals(seaTunnelRow.getArity(), 15); - Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), Byte.class); - Assertions.assertEquals(seaTunnelRow.getField(1).getClass(), Short.class); - Assertions.assertEquals(seaTunnelRow.getField(2).getClass(), Integer.class); - Assertions.assertEquals(seaTunnelRow.getField(3).getClass(), Long.class); - Assertions.assertEquals(seaTunnelRow.getField(4).getClass(), String.class); - Assertions.assertEquals(seaTunnelRow.getField(5).getClass(), Double.class); - Assertions.assertEquals(seaTunnelRow.getField(6).getClass(), Float.class); - Assertions.assertEquals(seaTunnelRow.getField(7).getClass(), BigDecimal.class); - Assertions.assertEquals(seaTunnelRow.getField(8).getClass(), Boolean.class); - Assertions.assertEquals(seaTunnelRow.getField(9).getClass(), LinkedHashMap.class); - Assertions.assertEquals(seaTunnelRow.getField(10).getClass(), String[].class); - Assertions.assertEquals(seaTunnelRow.getField(11).getClass(), LocalDate.class); - Assertions.assertEquals(seaTunnelRow.getField(12).getClass(), LocalDateTime.class); - Assertions.assertEquals(seaTunnelRow.getField(13).getClass(), LocalTime.class); - Assertions.assertEquals(seaTunnelRow.getField(14).getClass(), String.class); - - Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1); - Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22); - Assertions.assertEquals(seaTunnelRow.getField(2), 333); - Assertions.assertEquals(seaTunnelRow.getField(3), 4444L); - Assertions.assertEquals(seaTunnelRow.getField(4), "DusayI"); - Assertions.assertEquals(seaTunnelRow.getField(5), 5.555); - Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666); - Assertions.assertEquals(seaTunnelRow.getField(7), new BigDecimal("7.78")); - Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE); - Assertions.assertEquals( - seaTunnelRow.getField(9), - new LinkedHashMap() { - { - put("name", "Ivan"); - put("age", "26"); - } - }); - Assertions.assertArrayEquals( - (String[]) seaTunnelRow.getField(10), new String[] {"Ivan", "Dusayi"}); - Assertions.assertEquals( - seaTunnelRow.getField(11), - DateUtils.parse("2024-01-31", DateUtils.Formatter.YYYY_MM_DD)); - Assertions.assertEquals( - seaTunnelRow.getField(12), - DateTimeUtils.parse( - "2024-01-31 16:00:48", DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS)); - Assertions.assertEquals( - seaTunnelRow.getField(13), - TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS)); - Assertions.assertEquals(seaTunnelRow.getField(14), "xmlTest"); - } - } - - @Getter - public static class TestCollector implements Collector { - private final List rows = new ArrayList<>(); - - @Override - public void collect(SeaTunnelRow record) { - System.out.println(record); - rows.add(record); - } - - @Override - public Object getCheckpointLock() { - return null; - } - } - - public static class LocalConf extends HadoopConf { - private static final String HDFS_IMPL = "org.apache.hadoop.fs.LocalFileSystem"; - private static final String SCHEMA = "file"; - - public LocalConf(String hdfsNameKey) { - super(hdfsNameKey); - } - - @Override - public String getFsHdfsImpl() { - return HDFS_IMPL; - } - - @Override - public String getSchema() { - return SCHEMA; - } - } -} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/xml/name=xmlTest/test_read.xml b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/xml/name=xmlTest/test_read.xml deleted file mode 100644 index 0ffec43a150..00000000000 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/xml/name=xmlTest/test_read.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - diff --git a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/sink/CosFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/sink/CosFileSinkFactory.java index fb45c947172..451221248d3 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/sink/CosFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/sink/CosFileSinkFactory.java @@ -67,10 +67,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSource.java index 476a3878fe1..b2a5d931a18 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSource.java @@ -94,7 +94,6 @@ public void prepare(Config pluginConfig) throws PrepareFailException { case TEXT: case JSON: case EXCEL: - case XML: SeaTunnelRowType userDefinedSchema = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java index 6ab9ef17f48..b92c67c4c60 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java @@ -51,19 +51,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/sink/FtpFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/sink/FtpFileSinkFactory.java index f3a12d117aa..a3fbf886fbb 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/sink/FtpFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/sink/FtpFileSinkFactory.java @@ -67,10 +67,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSource.java index 19ea0c0ba09..b8ccb69d836 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSource.java @@ -99,7 +99,6 @@ public void prepare(Config pluginConfig) throws PrepareFailException { case TEXT: case JSON: case EXCEL: - case XML: SeaTunnelRowType userDefinedSchema = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java index 249deac26da..529c93a3f79 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java @@ -51,19 +51,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/sink/HdfsFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/sink/HdfsFileSinkFactory.java index fa2f872b0e8..f5243edbda8 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/sink/HdfsFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/sink/HdfsFileSinkFactory.java @@ -64,10 +64,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java index e02f7ad42c2..82db2773ee3 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java @@ -48,19 +48,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java index 60a426ccb90..4adf578b697 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java @@ -67,10 +67,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java index 7d73f16e7b6..857e8d89ff1 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSource.java @@ -95,7 +95,6 @@ public void prepare(Config pluginConfig) throws PrepareFailException { case TEXT: case JSON: case EXCEL: - case XML: SeaTunnelRowType userDefinedSchema = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java index 5a31832b33f..df640e44bd8 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java @@ -51,19 +51,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java index 770e8866b54..f65a93f9095 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java @@ -70,10 +70,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java index 450561a6081..44d5dab2015 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java @@ -57,19 +57,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java index 5931a46977b..49b5ff8bfa4 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java @@ -81,10 +81,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java index 6fb4a55c5d2..b332d99d472 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java @@ -64,19 +64,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java index 3a4e3df2fb8..d366449356c 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java @@ -86,10 +86,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java index f36e935818c..aa1e0cb952d 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java @@ -91,7 +91,6 @@ public void prepare(Config pluginConfig) throws PrepareFailException { case TEXT: case JSON: case EXCEL: - case XML: SeaTunnelRowType userDefinedSchema = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java index ebd752fbf09..552bff51fc9 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java @@ -56,19 +56,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/sink/SftpFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/sink/SftpFileSinkFactory.java index 2dc2c29bd99..3fe6e3abea5 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/sink/SftpFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/sink/SftpFileSinkFactory.java @@ -67,10 +67,6 @@ public OptionRule optionRule() { BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.PARQUET, BaseSinkConfig.PARQUET_COMPRESS) - .conditional( - BaseSinkConfig.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSinkConfig.XML_USE_ATTR_FORMAT) .optional(BaseSinkConfig.CUSTOM_FILENAME) .conditional( BaseSinkConfig.CUSTOM_FILENAME, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSource.java index 0ccee6c281e..5efaea3137b 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSource.java @@ -75,7 +75,7 @@ public void prepare(Config pluginConfig) throws PrepareFailException { if (fileFormat == FileFormat.ORC || fileFormat == FileFormat.PARQUET) { throw new FileConnectorException( CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT, - "Sftp file source connector only support read [text, csv, json, xml] files"); + "Sftp file source connector only support read [text, csv, json] files"); } String path = pluginConfig.getString(SftpConfigOptions.FILE_PATH.key()); hadoopConf = SftpConf.buildWithConfig(pluginConfig); @@ -99,7 +99,6 @@ public void prepare(Config pluginConfig) throws PrepareFailException { case TEXT: case JSON: case EXCEL: - case XML: SeaTunnelRowType userDefinedSchema = CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java index f66db3996b7..939ecc985f0 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java @@ -51,19 +51,10 @@ public OptionRule optionRule() { BaseSourceConfigOptions.FILE_FORMAT_TYPE, FileFormat.TEXT, BaseSourceConfigOptions.FIELD_DELIMITER) - .conditional( - BaseSourceConfigOptions.FILE_FORMAT_TYPE, - FileFormat.XML, - BaseSourceConfigOptions.XML_ROW_TAG, - BaseSourceConfigOptions.XML_USE_ATTR_FORMAT) .conditional( BaseSourceConfigOptions.FILE_FORMAT_TYPE, Arrays.asList( - FileFormat.TEXT, - FileFormat.JSON, - FileFormat.EXCEL, - FileFormat.CSV, - FileFormat.XML), + FileFormat.TEXT, FileFormat.JSON, FileFormat.EXCEL, FileFormat.CSV), TableSchemaOptions.SCHEMA) .optional(BaseSourceConfigOptions.PARSE_PARTITION_FROM_PATH) .optional(BaseSourceConfigOptions.DATE_FORMAT) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java index 6497bf6ab55..54cdeacfd24 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java @@ -330,12 +330,12 @@ public BasicTypeDefine reconvert(Column column) { case STRING: if (column.getColumnLength() == null || column.getColumnLength() <= 0) { builder.columnType( - String.format("%s(%s)", ORACLE_VARCHAR2, MAX_VARCHAR_LENGTH)); - builder.dataType(ORACLE_VARCHAR2); + String.format("%s(%s)", ORACLE_NVARCHAR2, MAX_VARCHAR_LENGTH)); + builder.dataType(ORACLE_NVARCHAR2); } else if (column.getColumnLength() <= MAX_VARCHAR_LENGTH) { builder.columnType( - String.format("%s(%s)", ORACLE_VARCHAR2, column.getColumnLength())); - builder.dataType(ORACLE_VARCHAR2); + String.format("%s(%s)", ORACLE_NVARCHAR2, column.getColumnLength())); + builder.dataType(ORACLE_NVARCHAR2); } else { builder.columnType(ORACLE_CLOB); builder.dataType(ORACLE_CLOB); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java index c8ce48e3098..88ba493e07e 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java @@ -660,8 +660,8 @@ public void testReconvertString() { BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); - Assertions.assertEquals("VARCHAR2(4000)", typeDefine.getColumnType()); - Assertions.assertEquals(OracleTypeConverter.ORACLE_VARCHAR2, typeDefine.getDataType()); + Assertions.assertEquals("NVARCHAR2(4000)", typeDefine.getColumnType()); + Assertions.assertEquals(OracleTypeConverter.ORACLE_NVARCHAR2, typeDefine.getDataType()); column = PhysicalColumn.builder() @@ -674,9 +674,9 @@ public void testReconvertString() { Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( - "%s(%s)", OracleTypeConverter.ORACLE_VARCHAR2, column.getColumnLength()), + "%s(%s)", OracleTypeConverter.ORACLE_NVARCHAR2, column.getColumnLength()), typeDefine.getColumnType()); - Assertions.assertEquals(OracleTypeConverter.ORACLE_VARCHAR2, typeDefine.getDataType()); + Assertions.assertEquals(OracleTypeConverter.ORACLE_NVARCHAR2, typeDefine.getDataType()); column = PhysicalColumn.builder() @@ -689,9 +689,9 @@ public void testReconvertString() { Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( - "%s(%s)", OracleTypeConverter.ORACLE_VARCHAR2, column.getColumnLength()), + "%s(%s)", OracleTypeConverter.ORACLE_NVARCHAR2, column.getColumnLength()), typeDefine.getColumnType()); - Assertions.assertEquals(OracleTypeConverter.ORACLE_VARCHAR2, typeDefine.getDataType()); + Assertions.assertEquals(OracleTypeConverter.ORACLE_NVARCHAR2, typeDefine.getDataType()); column = PhysicalColumn.builder() diff --git a/seatunnel-connectors-v2/pom.xml b/seatunnel-connectors-v2/pom.xml index f69b5920879..5cc849de015 100644 --- a/seatunnel-connectors-v2/pom.xml +++ b/seatunnel-connectors-v2/pom.xml @@ -74,7 +74,6 @@ connector-rocketmq connector-amazonsqs connector-paimon - connector-easysearch diff --git a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/util/RegexUtils.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/SystemUtil.java similarity index 58% rename from seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/util/RegexUtils.java rename to seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/SystemUtil.java index b953a6fb8d8..7115173c99e 100644 --- a/seatunnel-connectors-v2/connector-easysearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/easysearch/util/RegexUtils.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/SystemUtil.java @@ -15,23 +15,27 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.easysearch.util; +package org.apache.seatunnel.core.starter.utils; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import org.apache.commons.lang3.SystemUtils; -public class RegexUtils { +public class SystemUtil { - public static List extractDatas(String content, String regex) { - List datas = new ArrayList<>(); - Pattern pattern = Pattern.compile(regex, Pattern.DOTALL); - Matcher matcher = pattern.matcher(content); - while (matcher.find()) { - String result = matcher.group(1); - datas.add(result); + public static String GetOsType() { + String os_type = ""; + + if (SystemUtils.IS_OS_WINDOWS) { + os_type = "Windows"; + } else if (SystemUtils.IS_OS_MAC) { + os_type = "Mac"; + } else if (SystemUtils.IS_OS_LINUX) { + os_type = "Linux"; + } else if (SystemUtils.IS_OS_SOLARIS) { + os_type = "Solaris"; + } else { + os_type = "Unknown"; } - return datas; + + return os_type; } } diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd index c1cbc1d9556..981d40a07ab 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd @@ -27,7 +27,11 @@ cd /d "%PRG_DIR%" || ( exit /b 1 ) -set "APP_DIR=%~dp0" +set "currentDir=%~dp0" +set "currentDir=%currentDir:~0,-1%" +for %%i in ("%currentDir%") do set "APP_DIR=%%~dpi" +set "APP_DIR=%APP_DIR:~0,-1%" +rem set "APP_DIR=%~dp0" set "CONF_DIR=%APP_DIR%\config" set "APP_JAR=%APP_DIR%\starter\seatunnel-flink-13-starter.jar" set "APP_MAIN=org.apache.seatunnel.core.starter.flink.FlinkStarter" @@ -51,20 +55,21 @@ if exist "%CONF_DIR%\log4j2.properties" ( ) set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" +set "full_java_cmd=java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%" -for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( - set "CMD=%%i" - setlocal disabledelayedexpansion +for /f "delims=" %%i in ('echo !full_java_cmd!') do ( + rem set "CMD=%%i" + rem setlocal disabledelayedexpansion if !errorlevel! equ 234 ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b 0 ) else if !errorlevel! equ 0 ( - echo Execute SeaTunnel Flink Job: !CMD! + echo Execute SeaTunnel Flink Job: %full_java_cmd% endlocal - call !CMD! + call %full_java_cmd% ) else ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b !errorlevel! ) diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java index 5dc1d32cef5..edb90597c45 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java @@ -22,6 +22,7 @@ import org.apache.seatunnel.core.starter.enums.EngineType; import org.apache.seatunnel.core.starter.flink.args.FlinkCommandArgs; import org.apache.seatunnel.core.starter.utils.CommandLineUtils; +import org.apache.seatunnel.core.starter.utils.SystemUtil; import java.util.ArrayList; import java.util.List; @@ -52,8 +53,44 @@ public static void main(String[] args) { @Override public List buildCommands() { List command = new ArrayList<>(); + String local_os_type = ""; + + SystemUtil my_system_util = new SystemUtil(); + local_os_type = my_system_util.GetOsType(); + + // set start command + String cmd_flink = ""; + + // Nothe that "flink.cmd” or "flink.bat" can be retrieved from lower version of flink (e.g. + // 1.0.9) + // We do not check if this file exists on the box, user needs to make sure this file exists + // or not. + switch (local_os_type.toLowerCase()) { + case "windows": + cmd_flink = "%FLINK_HOME%/bin/flink.cmd"; + break; + case "linux": + cmd_flink = "${FLINK_HOME}/bin/flink"; + break; + case "solaris": + cmd_flink = "${FLINK_HOME}/bin/flink"; + break; + case "mac": + cmd_flink = "${FLINK_HOME}/bin/flink"; + break; + case "unknown": + cmd_flink = "error"; + break; + } + // set start command - command.add("${FLINK_HOME}/bin/flink"); + if (!(cmd_flink.equals("error"))) { + command.add(cmd_flink); + } else { + System.out.println("Error: Can not determine OS type, abort run !"); + System.exit(-1); + } + // set deploy mode, run or run-application command.add(flinkCommandArgs.getDeployMode().getDeployMode()); // set submitted target master @@ -91,6 +128,8 @@ public List buildCommands() { .filter(Objects::nonNull) .map(String::trim) .forEach(variable -> command.add("-D" + variable)); + // debug + // System.out.println("Whole command string:" + command.toString()); return command; } } diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd index ed4c1f6979e..93e7bf8589b 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd @@ -27,7 +27,11 @@ cd /d "%PRG_DIR%" || ( exit /b 1 ) -set "APP_DIR=%~dp0" +set "currentDir=%~dp0" +set "currentDir=%currentDir:~0,-1%" +for %%i in ("%currentDir%") do set "APP_DIR=%%~dpi" +set "APP_DIR=%APP_DIR:~0,-1%" +rem set "APP_DIR=%~dp0" set "CONF_DIR=%APP_DIR%\config" set "APP_JAR=%APP_DIR%\starter\seatunnel-flink-15-starter.jar" set "APP_MAIN=org.apache.seatunnel.core.starter.flink.FlinkStarter" @@ -51,20 +55,21 @@ if exist "%CONF_DIR%\log4j2.properties" ( ) set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" +set "full_java_cmd=java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%" -for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( - set "CMD=%%i" - setlocal disabledelayedexpansion +for /f "delims=" %%i in ('echo !full_java_cmd!') do ( + rem set "CMD=%%i" + rem setlocal disabledelayedexpansion if !errorlevel! equ 234 ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b 0 ) else if !errorlevel! equ 0 ( - echo Execute SeaTunnel Flink Job: !CMD! + echo Execute SeaTunnel Flink Job: %full_java_cmd% endlocal - call !CMD! + call %full_java_cmd% ) else ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b !errorlevel! ) diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java index 7373cb58ed5..d598068877f 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java @@ -22,6 +22,7 @@ import org.apache.seatunnel.core.starter.enums.EngineType; import org.apache.seatunnel.core.starter.flink.args.FlinkCommandArgs; import org.apache.seatunnel.core.starter.utils.CommandLineUtils; +import org.apache.seatunnel.core.starter.utils.SystemUtil; import java.util.ArrayList; import java.util.List; @@ -44,6 +45,7 @@ public class FlinkStarter implements Starter { this.appJar = Common.appStarterDir().resolve(APP_JAR_NAME).toString(); } + @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) { FlinkStarter flinkStarter = new FlinkStarter(args); System.out.println(String.join(" ", flinkStarter.buildCommands())); @@ -52,8 +54,44 @@ public static void main(String[] args) { @Override public List buildCommands() { List command = new ArrayList<>(); + String local_os_type = ""; + + SystemUtil my_system_util = new SystemUtil(); + local_os_type = my_system_util.GetOsType(); + // debug + // System.out.println("OS type:"+local_os_type); + + String cmd_flink = ""; + + // Nothe that "flink.cmd” or "flink.bat" can be retrieved from lower version of flink (e.g. + // 1.0.9) ,We do not check if this file exists on the box, user needs to make sure this file + // exists or not. + switch (local_os_type.toLowerCase()) { + case "windows": + cmd_flink = "%FLINK_HOME%/bin/flink.cmd"; + break; + case "linux": + cmd_flink = "${FLINK_HOME}/bin/flink"; + break; + case "solaris": + cmd_flink = "${FLINK_HOME}/bin/flink"; + break; + case "mac": + cmd_flink = "${FLINK_HOME}/bin/flink"; + break; + case "unknown": + cmd_flink = "error"; + break; + } + // set start command - command.add("${FLINK_HOME}/bin/flink"); + if (!(cmd_flink.equals("error"))) { + command.add(cmd_flink); + } else { + System.out.println("Error: Can not determine OS type, abort run !"); + System.exit(-1); + } + // set deploy mode, run or run-application command.add(flinkCommandArgs.getDeployMode().getDeployMode()); // set submitted target master @@ -91,6 +129,8 @@ public List buildCommands() { .filter(Objects::nonNull) .map(String::trim) .forEach(variable -> command.add("-D" + variable)); + // debug + // System.out.println("Whole command string:" + command.toString()); return command; } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/xml/test_read_xml.conf b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/Resources/flink13-job-command.properties similarity index 57% rename from seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/xml/test_read_xml.conf rename to seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/Resources/flink13-job-command.properties index f81534b8b78..566b8d7f0c6 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/xml/test_read_xml.conf +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/Resources/flink13-job-command.properties @@ -6,34 +6,20 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# + +# This property file defines all properties which is used by submitting spark3(2) job test via spark cmd script +FLINK13_HOME=E:/Apache/flink/flink-1.13 +FLINK13_SUBMIT_JOB_CMD=flink.cmd +SEATUNNEL_HOME=E:/Apache/seatunnel/apache-seatunnel-2.3.4 +SEATUNNEL_SUBMIT_FLINK13_JOB_CMD=start-seatunnel-flink-13-connector-v2.cmd +SEATUNNEL_SUBMIT_FLINK13_JOB_JAR_LIST=E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/seatunnel-flink-13-starter.jar;E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/logging/* -{ - xml_row_tag = "RECORD" - xml_use_attr_format = true - schema = { - fields { - c_bytes = "tinyint" - c_short = "smallint" - c_int = "int" - c_bigint = "bigint" - c_string = "string" - c_double = "double" - c_float = "float" - c_decimal = "decimal(10, 2)" - c_boolean = "boolean" - c_map = "map" - c_array = "array" - c_date = "date" - c_datetime = "timestamp" - c_time = "time" - } - } -} \ No newline at end of file diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/Resources/flink15-job-command.properties b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/Resources/flink15-job-command.properties new file mode 100644 index 00000000000..748e6df1ab4 --- /dev/null +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/Resources/flink15-job-command.properties @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This property file defines all properties which is used by submitting spark3(2) job test via spark cmd script +FLINK15_HOME=E:/Apache/flink/flink-1.18.1-build-from-src +FLINK15_SUBMIT_JOB_CMD=flink.cmd +SEATUNNEL_HOME=E:/Apache/seatunnel/apache-seatunnel-2.3.4 +SEATUNNEL_SUBMIT_FLINK15_JOB_CMD=start-seatunnel-flink-15-connector-v2.cmd +SEATUNNEL_SUBMIT_FLINK15_JOB_JAR_LIST=E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/seatunnel-flink-15-starter.jar;E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/logging/* + + diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/java/org/apache/seatunnel/core/starter/flink/TestFlinkJobCommandReturnV13.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/java/org/apache/seatunnel/core/starter/flink/TestFlinkJobCommandReturnV13.java new file mode 100644 index 00000000000..848f1a50315 --- /dev/null +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/java/org/apache/seatunnel/core/starter/flink/TestFlinkJobCommandReturnV13.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.core.starter.flink; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Properties; + +public class TestFlinkJobCommandReturnV13 { + @Test + public void testSparkJobCommandReturnV13() throws Exception { + String flink_job_command = null; + String flink_job_command_return = null; + String flink_job_command_return_error = null; + String flink_home_dir = null; + String flink_submit_job_cmd = null; + String seatunnel_home_dir = null; + String seatunnel_submit_flink_job_jar_list = null; + String seatunnel_submit_flink_job_cmd = null; + String seatunnel_submit_flink_job_cmd_paras = null; + String seatunnel_submit_flink_job_cmd_full_path = null; + + StringBuilder sb_cmd_final = new StringBuilder(); + String seatunnel_submit_flink_job_cmd_final = null; + + final String separator = "/"; + Properties prop = new Properties(); + + try { + InputStream in = + this.getClass() + .getClassLoader() + .getResourceAsStream("flink13-job-command.properties"); + if (in == null) { + throw new FileNotFoundException( + "Resource file not found. Make sure the file exists in src/test/resources."); + } + prop.load(in); + + flink_home_dir = prop.getProperty("FLINK13_HOME"); + flink_submit_job_cmd = prop.getProperty("FLINK13_SUBMIT_JOB_CMD"); + seatunnel_home_dir = prop.getProperty("SEATUNNEL_HOME"); + seatunnel_submit_flink_job_jar_list = + prop.getProperty("SEATUNNEL_SUBMIT_FLINK13_JOB_JAR_LIST"); + seatunnel_submit_flink_job_cmd = prop.getProperty("SEATUNNEL_SUBMIT_FLINK13_JOB_CMD"); + + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("bin"); + sb_cmd_final.append(separator); + sb_cmd_final.append(seatunnel_submit_flink_job_cmd); + sb_cmd_final.append(" --config "); + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("/config/v2.streaming.conf.template"); + + seatunnel_submit_flink_job_cmd_final = sb_cmd_final.toString(); + // debug + // System.out.println("Final command:" + seatunnel_submit_flink_job_cmd_final); + + Process process = Runtime.getRuntime().exec(seatunnel_submit_flink_job_cmd_final); + process.waitFor(); // wait for the command to finish + + // Read the stdout of command + flink_job_command_return = readStream(process.getInputStream()); + + // Read the stderror of command + flink_job_command_return_error = readStream(process.getErrorStream()); + Assertions.assertNull(flink_job_command_return_error); + + /* if (flink_job_command_return_error == null) { + System.out.println("Seatunnel flink job submutted successfully."); + } else { + System.out.println("Seatunnel flink job submutted failed, error messages is:"); + System.out.println(flink_job_command_return_error); + } + */ + + process.destroy(); + // debug + System.out.println( + "Auto closed [start-seatunnel-flink-13-connector-v2.cmd] successfully."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private static String readStream(InputStream inputStream) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + String line; + StringBuilder sb = new StringBuilder(); + String str_final = ""; + + while ((line = reader.readLine()) != null) { + System.out.println(line); + sb.append(line); + } + str_final = sb.toString(); + if (!(str_final.isEmpty())) { + return str_final; + } else { + return null; + } + } + } +} diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/java/org/apache/seatunnel/core/starter/flink/TestFlinkJobCommandReturnV15.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/java/org/apache/seatunnel/core/starter/flink/TestFlinkJobCommandReturnV15.java new file mode 100644 index 00000000000..6a812992405 --- /dev/null +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/test/java/org/apache/seatunnel/core/starter/flink/TestFlinkJobCommandReturnV15.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.core.starter.flink; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Properties; + +public class TestFlinkJobCommandReturnV15 { + @Test + public void testSparkJobCommandReturnV15() throws Exception { + String flink_job_command = null; + String flink_job_command_return = null; + String flink_job_command_return_error = null; + String flink_home_dir = null; + String flink_submit_job_cmd = null; + String seatunnel_home_dir = null; + String seatunnel_submit_flink_job_jar_list = null; + String seatunnel_submit_flink_job_cmd = null; + String seatunnel_submit_flink_job_cmd_paras = null; + String seatunnel_submit_flink_job_cmd_full_path = null; + + StringBuilder sb_cmd_final = new StringBuilder(); + String seatunnel_submit_flink_job_cmd_final = null; + + final String separator = "/"; + Properties prop = new Properties(); + + try { + InputStream in = + this.getClass() + .getClassLoader() + .getResourceAsStream("flink15-job-command.properties"); + if (in == null) { + throw new FileNotFoundException( + "Resource file not found. Make sure the file exists in src/test/resources."); + } + prop.load(in); + + flink_home_dir = prop.getProperty("FLINK15_HOME"); + flink_submit_job_cmd = prop.getProperty("FLINK15_SUBMIT_JOB_CMD"); + seatunnel_home_dir = prop.getProperty("SEATUNNEL_HOME"); + seatunnel_submit_flink_job_jar_list = + prop.getProperty("SEATUNNEL_SUBMIT_FLINK15_JOB_JAR_LIST"); + seatunnel_submit_flink_job_cmd = prop.getProperty("SEATUNNEL_SUBMIT_FLINK15_JOB_CMD"); + + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("bin"); + sb_cmd_final.append(separator); + sb_cmd_final.append(seatunnel_submit_flink_job_cmd); + sb_cmd_final.append(" --config "); + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("/config/v2.streaming.conf.template"); + + seatunnel_submit_flink_job_cmd_final = sb_cmd_final.toString(); + // debug + // System.out.println("Final command:" + seatunnel_submit_flink_job_cmd_final); + + Process process = Runtime.getRuntime().exec(seatunnel_submit_flink_job_cmd_final); + process.waitFor(); // wait for the command to finish + + // Read the stdout of command + flink_job_command_return = readStream(process.getInputStream()); + + // Read the stderror of command + flink_job_command_return_error = readStream(process.getErrorStream()); + Assertions.assertNull(flink_job_command_return_error); + + /* + if (flink_job_command_return_error == null) { + System.out.println("Seatunnel flink job submutted successfully."); + } else { + System.out.println("Seatunnel flink job submutted failed, error messages is:"); + System.out.println(flink_job_command_return_error); + } + */ + + process.destroy(); + // debug + System.out.println( + "Auto closed [start-seatunnel-flink-15-connector-v2.cmd] successfully."); + + } catch (IOException e) { + e.printStackTrace(); + } + } + + private static String readStream(InputStream inputStream) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + String line; + StringBuilder sb = new StringBuilder(); + String str_final = ""; + + while ((line = reader.readLine()) != null) { + System.out.println(line); + sb.append(line); + } + str_final = sb.toString(); + if (!(str_final.isEmpty())) { + return str_final; + } else { + return null; + } + } + } +} diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd index b2671671383..2e0e15b88a3 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd @@ -27,7 +27,11 @@ cd /d "%PRG_DIR%" || ( exit /b 1 ) -set "APP_DIR=%~dp0" +set "currentDir=%~dp0" +set "currentDir=%currentDir:~0,-1%" +for %%i in ("%currentDir%") do set "APP_DIR=%%~dpi" +set "APP_DIR=%APP_DIR:~0,-1%" +rem set "APP_DIR=%~dp0" set "CONF_DIR=%APP_DIR%\config" set "APP_JAR=%APP_DIR%\starter\seatunnel-spark-2-starter.jar" set "APP_MAIN=org.apache.seatunnel.core.starter.spark.SparkStarter" @@ -51,20 +55,21 @@ if exist "%CONF_DIR%\log4j2.properties" ( ) set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" +set "full_java_cmd=java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%" -for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( - set "CMD=%%i" - setlocal disabledelayedexpansion +for /f "delims=" %%i in ('echo !full_java_cmd!') do ( + rem set "CMD=%%i" + rem setlocal disabledelayedexpansion if !errorlevel! equ 234 ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b 0 ) else if !errorlevel! equ 0 ( - echo Execute SeaTunnel Spark Job: !CMD! + echo Execute SeaTunnel Spark Job: %full_java_cmd% endlocal - call !CMD! + call %full_java_cmd% ) else ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b !errorlevel! ) diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java index 1b8918976b4..8d551cce054 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java @@ -31,6 +31,7 @@ import org.apache.seatunnel.core.starter.utils.CommandLineUtils; import org.apache.seatunnel.core.starter.utils.CompressionUtils; import org.apache.seatunnel.core.starter.utils.ConfigBuilder; +import org.apache.seatunnel.core.starter.utils.SystemUtil; import org.apache.seatunnel.plugin.discovery.PluginIdentifier; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSinkPluginDiscovery; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSourcePluginDiscovery; @@ -80,6 +81,7 @@ private SparkStarter(String[] args, SparkCommandArgs commandArgs) { this.commandArgs = commandArgs; } + @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) throws IOException { SparkStarter starter = getInstance(args); List command = starter.buildCommands(); @@ -195,7 +197,40 @@ private List getConnectorJarDependencies() { /** build final spark-submit commands */ protected List buildFinal() { List commands = new ArrayList<>(); - commands.add("${SPARK_HOME}/bin/spark-submit"); + String local_os_type = ""; + + SystemUtil my_system_util = new SystemUtil(); + local_os_type = my_system_util.GetOsType(); + // debug + // System.out.println("OS type:"+local_os_type); + + String cmd_spark = ""; + + switch (local_os_type.toLowerCase()) { + case "windows": + cmd_spark = "%SPARK_HOME%/bin/spark-submit.cmd"; + break; + case "linux": + cmd_spark = "${SPARK_HOME}/bin/spark-submit"; + break; + case "solaris": + cmd_spark = "${SPARK_HOME}/bin/spark-submit"; + break; + case "mac": + cmd_spark = "${SPARK_HOME}/bin/spark-submit"; + break; + case "unknown": + cmd_spark = "error"; + break; + } + + if (!(cmd_spark.equals("error"))) { + commands.add(cmd_spark); + } else { + System.out.println("Error: Can not determine OS type, abort run !"); + System.exit(-1); + } + appendOption(commands, "--class", SeaTunnelSpark.class.getName()); appendOption(commands, "--name", this.commandArgs.getJobName()); appendOption(commands, "--master", this.commandArgs.getMaster()); @@ -217,6 +252,9 @@ protected List buildFinal() { if (this.commandArgs.isCheckConfig()) { commands.add("--check"); } + + // debug + // System.out.println("Whole spark job command string:" + commands.toString()); return commands; } diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd index 433fe23c6d1..f5a47d6e7d6 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd @@ -27,7 +27,11 @@ cd /d "%PRG_DIR%" || ( exit /b 1 ) -set "APP_DIR=%~dp0" +set "currentDir=%~dp0" +set "currentDir=%currentDir:~0,-1%" +for %%i in ("%currentDir%") do set "APP_DIR=%%~dpi" +set "APP_DIR=%APP_DIR:~0,-1%" +rem set "APP_DIR=%~dp0" set "CONF_DIR=%APP_DIR%\config" set "APP_JAR=%APP_DIR%\starter\seatunnel-spark-3-starter.jar" set "APP_MAIN=org.apache.seatunnel.core.starter.spark.SparkStarter" @@ -51,21 +55,23 @@ if exist "%CONF_DIR%\log4j2.properties" ( ) set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" +set "full_java_cmd=java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%" -for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( - set "CMD=%%i" - setlocal disabledelayedexpansion +for /f "delims=" %%i in ('echo !full_java_cmd!') do ( + rem set "CMD=%%i" + rem setlocal disabledelayedexpansion if !errorlevel! equ 234 ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b 0 ) else if !errorlevel! equ 0 ( - echo Execute SeaTunnel Spark Job: !CMD! + echo Execute SeaTunnel Spark Job: %full_java_cmd% endlocal - call !CMD! + call %full_java_cmd% ) else ( - echo !CMD! + echo %full_java_cmd% endlocal exit /b !errorlevel! ) ) + diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java index c33544873a7..8b080e15dd6 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java @@ -31,6 +31,7 @@ import org.apache.seatunnel.core.starter.utils.CommandLineUtils; import org.apache.seatunnel.core.starter.utils.CompressionUtils; import org.apache.seatunnel.core.starter.utils.ConfigBuilder; +import org.apache.seatunnel.core.starter.utils.SystemUtil; import org.apache.seatunnel.plugin.discovery.PluginIdentifier; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSinkPluginDiscovery; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSourcePluginDiscovery; @@ -80,6 +81,7 @@ private SparkStarter(String[] args, SparkCommandArgs commandArgs) { this.commandArgs = commandArgs; } + @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) throws IOException { SparkStarter starter = getInstance(args); List command = starter.buildCommands(); @@ -169,7 +171,7 @@ static Map getSparkConf(String configFile) throws FileNotFoundEx Map.Entry::getKey, e -> e.getValue().unwrapped().toString())); } - /** return connector's jars, which located in 'connectors/*'. */ + /** return connector's jars, which located in 'connectors/spark/*'. */ private List getConnectorJarDependencies() { Path pluginRootDir = Common.connectorDir(); if (!Files.exists(pluginRootDir) || !Files.isDirectory(pluginRootDir)) { @@ -195,7 +197,40 @@ private List getConnectorJarDependencies() { /** build final spark-submit commands */ protected List buildFinal() { List commands = new ArrayList<>(); - commands.add("${SPARK_HOME}/bin/spark-submit"); + String local_os_type = ""; + + SystemUtil my_system_util = new SystemUtil(); + local_os_type = my_system_util.GetOsType(); + // debug + // System.out.println("OS type:"+local_os_type); + + String cmd_spark = ""; + + switch (local_os_type.toLowerCase()) { + case "windows": + cmd_spark = "%SPARK_HOME%/bin/spark-submit.cmd"; + break; + case "linux": + cmd_spark = "${SPARK_HOME}/bin/spark-submit"; + break; + case "solaris": + cmd_spark = "${SPARK_HOME}/bin/spark-submit"; + break; + case "mac": + cmd_spark = "${SPARK_HOME}/bin/spark-submit"; + break; + case "unknown": + cmd_spark = "error"; + break; + } + + if (!(cmd_spark.equals("error"))) { + commands.add(cmd_spark); + } else { + System.out.println("Error: Can not determine OS type, abort run !"); + System.exit(-1); + } + appendOption(commands, "--class", SeaTunnelSpark.class.getName()); appendOption(commands, "--name", this.commandArgs.getJobName()); appendOption(commands, "--master", this.commandArgs.getMaster()); @@ -217,6 +252,9 @@ protected List buildFinal() { if (this.commandArgs.isCheckConfig()) { commands.add("--check"); } + + // debug + // System.out.println("Whole spark job command string:" + commands.toString()); return commands; } @@ -259,6 +297,7 @@ protected void appendAppJar(List commands) { Common.appStarterDir().resolve(EngineType.SPARK3.getStarterJarName()).toString()); } + @SuppressWarnings("checkstyle:Indentation") private List getPluginIdentifiers(Config config, PluginType... pluginTypes) { return Arrays.stream(pluginTypes) .flatMap( diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/java/org/apache/seatunnel/core/starter/spark/TestSparkJobCommandReturnV2.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/java/org/apache/seatunnel/core/starter/spark/TestSparkJobCommandReturnV2.java new file mode 100644 index 00000000000..ef06978c2b9 --- /dev/null +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/java/org/apache/seatunnel/core/starter/spark/TestSparkJobCommandReturnV2.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.core.starter.spark; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Properties; + +public class TestSparkJobCommandReturnV2 { + @Test + public void testSparkJobCommandReturnV2() throws Exception { + String spark2_job_command = null; + String spark2_job_command_return = null; + String spark2_job_command_return_error = null; + String spark2_home_dir = null; + String spark2_submit_job_cmd = null; + String seatunnel_home_dir = null; + String seatunnel_submit_spark2_job_jar_list = null; + String seatunnel_submit_spark2_job_cmd = null; + String seatunnel_submit_spark2_job_cmd_paras = null; + String seatunnel_submit_spark2_job_cmd_full_path = null; + + StringBuilder sb_cmd_final = new StringBuilder(); + String seatunnel_submit_spark2_job_cmd_final = null; + + final String separator = "/"; + Properties prop = new Properties(); + + try { + InputStream in = + this.getClass() + .getClassLoader() + .getResourceAsStream("spark-2-job-command.properties"); + if (in == null) { + throw new FileNotFoundException( + "Resource file not found. Make sure the file exists in src/test/resources."); + } + prop.load(in); + + spark2_home_dir = prop.getProperty("SPARK2_HOME"); + spark2_submit_job_cmd = prop.getProperty("SPARK2_SUBMIT_JOB_CMD"); + seatunnel_home_dir = prop.getProperty("SEATUNNEL_HOME"); + seatunnel_submit_spark2_job_jar_list = + prop.getProperty("SEATUNNEL_SUBMIT_SPARK2_JOB_JAR_LIST"); + seatunnel_submit_spark2_job_cmd = prop.getProperty("SEATUNNEL_SUBMIT_SPARK2_JOB_CMD"); + + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("bin"); + sb_cmd_final.append(separator); + sb_cmd_final.append(seatunnel_submit_spark2_job_cmd); + sb_cmd_final.append(" --config "); + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("/config/v2.batch.config.template"); + + seatunnel_submit_spark2_job_cmd_final = sb_cmd_final.toString(); + // debug + // System.out.println("Final command:" + seatunnel_submit_spark2_job_cmd_final); + + Process process = Runtime.getRuntime().exec(seatunnel_submit_spark2_job_cmd_final); + process.waitFor(); // wait for the command to finish + + // Read the stdout of command + spark2_job_command_return = readStream(process.getInputStream()); + + // Read the stderror of command + spark2_job_command_return_error = readStream(process.getErrorStream()); + // debug + System.out.println("Job command returns:" + spark2_job_command_return); + + Assertions.assertNotNull(spark2_job_command_return); + Assertions.assertNull(spark2_job_command_return_error); + + /* + if (spark2_job_command_return_error == null) { + System.out.println("Seatunnel Spark2 job submutted successfully."); + } else { + System.out.println("Seatunnel Spark2 job submutted failed, error messages is:"); + System.out.println(spark2_job_command_return_error); + }*/ + + process.destroy(); + // debug + System.out.println( + "Auto closed [start-seatunnel-spark-2-connector-v2.cmd] successfully."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private static String readStream(InputStream inputStream) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + String line; + StringBuilder sb = new StringBuilder(); + String str_final = ""; + + while ((line = reader.readLine()) != null) { + System.out.println(line); + sb.append(line); + } + str_final = sb.toString(); + if (!(str_final.isEmpty())) { + return str_final; + } else { + return null; + } + } + } +} diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/java/org/apache/seatunnel/core/starter/spark/TestSparkJobCommandReturnV3.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/java/org/apache/seatunnel/core/starter/spark/TestSparkJobCommandReturnV3.java new file mode 100644 index 00000000000..7a942880b48 --- /dev/null +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/java/org/apache/seatunnel/core/starter/spark/TestSparkJobCommandReturnV3.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.core.starter.spark; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Properties; + +public class TestSparkJobCommandReturnV3 { + @Test + public void testSparkJobCommandReturnV3() throws Exception { + String spark3_job_command = null; + String spark3_job_command_return = null; + String spark3_job_command_return_error = null; + String spark3_home_dir = null; + String spark3_submit_job_cmd = null; + String seatunnel_home_dir = null; + String seatunnel_submit_spark3_job_jar_list = null; + String seatunnel_submit_spark3_job_cmd = null; + String seatunnel_submit_spark3_job_cmd_paras = null; + String seatunnel_submit_spark3_job_cmd_full_path = null; + + StringBuilder sb_cmd_final = new StringBuilder(); + String seatunnel_submit_spark3_job_cmd_final = null; + + final String separator = "/"; + Properties prop = new Properties(); + + try { + InputStream in = + this.getClass() + .getClassLoader() + .getResourceAsStream("spark-3-job-command.properties"); + if (in == null) { + throw new FileNotFoundException( + "Resource file not found. Make sure the file exists in src/test/resources."); + } + prop.load(in); + + spark3_home_dir = prop.getProperty("SPARK3_HOME"); + spark3_submit_job_cmd = prop.getProperty("SPARK3_SUBMIT_JOB_CMD"); + seatunnel_home_dir = prop.getProperty("SEATUNNEL_HOME"); + seatunnel_submit_spark3_job_jar_list = + prop.getProperty("SEATUNNEL_SUBMIT_SPARK3_JOB_JAR_LIST"); + seatunnel_submit_spark3_job_cmd = prop.getProperty("SEATUNNEL_SUBMIT_SPARK3_JOB_CMD"); + + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("bin"); + sb_cmd_final.append(separator); + sb_cmd_final.append(seatunnel_submit_spark3_job_cmd); + sb_cmd_final.append(" --config "); + sb_cmd_final.append(seatunnel_home_dir); + sb_cmd_final.append(separator); + sb_cmd_final.append("/config/v2.batch.config.template"); + + seatunnel_submit_spark3_job_cmd_final = sb_cmd_final.toString(); + // debug + // System.out.println("Final command:" + seatunnel_submit_spark3_job_cmd_final); + + Process process = Runtime.getRuntime().exec(seatunnel_submit_spark3_job_cmd_final); + process.waitFor(); // wait for the command to finish + + // Read the stdout of command + spark3_job_command_return = readStream(process.getInputStream()); + + // Read the stderror of command + spark3_job_command_return_error = readStream(process.getErrorStream()); + // debug + System.out.println("Job command returns:" + spark3_job_command_return); + + Assertions.assertNotNull(spark3_job_command_return); + Assertions.assertNull(spark3_job_command_return_error); + /* + if (spark3_job_command_return_error == null) { + System.out.println("Seatunnel Spark3 job submutted successfully."); + } else { + System.out.println("Seatunnel Spark3 job submutted failed, error messages is:"); + System.out.println(spark3_job_command_return_error); + } + */ + + process.destroy(); + // debug + System.out.println( + "Auto closed [start-seatunnel-spark-3-connector-v2.cmd] successfully."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private static String readStream(InputStream inputStream) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + String line; + StringBuilder sb = new StringBuilder(); + String str_final = ""; + + while ((line = reader.readLine()) != null) { + System.out.println(line); + sb.append(line); + } + str_final = sb.toString(); + if (!(str_final.isEmpty())) { + return str_final; + } else { + return null; + } + } + } +} diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/resources/spark-2-job-command.properties b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/resources/spark-2-job-command.properties new file mode 100644 index 00000000000..be6d491f81c --- /dev/null +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/resources/spark-2-job-command.properties @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This property file defines all properties which is used by submitting spark3(2) job test via spark cmd script +SPARK2_HOME=E:/Apache/spark-3.4.1-bin-hadoop3 +SPARK2_SUBMIT_JOB_CMD=spark-submit.cmd +SEATUNNEL_HOME=E:/Apache/seatunnel/apache-seatunnel-2.3.4 +SEATUNNEL_SUBMIT_SPARK2_JOB_CMD=start-seatunnel-spark-2-connector-v2.cmd +SEATUNNEL_SUBMIT_SPARK2_JOB_JAR_LIST=E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/seatunnel-spark-2-starter.jar;E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/logging/* + + diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/resources/spark-3-job-command.properties b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/resources/spark-3-job-command.properties new file mode 100644 index 00000000000..82511bbef53 --- /dev/null +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/test/resources/spark-3-job-command.properties @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This property file defines all properties which is used by submitting spark3(2) job test via spark cmd script +SPARK3_HOME=E:/Apache/spark-3.4.1-bin-hadoop3 +SPARK3_SUBMIT_JOB_CMD=spark-submit.cmd +SEATUNNEL_HOME=E:/Apache/seatunnel/apache-seatunnel-2.3.4 +SEATUNNEL_SUBMIT_SPARK3_JOB_CMD=start-seatunnel-spark-3-connector-v2.cmd +SEATUNNEL_SUBMIT_SPARK3_JOB_JAR_LIST=E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/seatunnel-spark-3-starter.jar;E:/Apache/seatunnel/apache-seatunnel-2.3.4/starter/logging/* + + diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index 52155d42245..830a8001aca 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -543,13 +543,6 @@ provided - - org.apache.seatunnel - connector-easysearch - ${project.version} - provided - - com.aliyun.phoenix diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java index 290da4381ab..275c714686c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.sink.SupportSaveMode; -import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; @@ -28,14 +27,12 @@ import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; -import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.connectors.doris.catalog.DorisCatalog; import org.apache.seatunnel.connectors.doris.catalog.DorisCatalogFactory; import org.apache.seatunnel.connectors.doris.config.DorisOptions; import org.apache.seatunnel.connectors.doris.sink.DorisSinkFactory; -import org.apache.seatunnel.connectors.doris.source.DorisSourceFactory; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -257,43 +254,6 @@ private CatalogTable assertCreateTable( return createdTable; } - @Test - public void testDorisSourceSelectFieldsNotLossKeysInformation() { - catalog.createTable(tablePath, catalogTable, true); - DorisSourceFactory dorisSourceFactory = new DorisSourceFactory(); - SeaTunnelSource dorisSource = - dorisSourceFactory - .createSource( - new TableSourceFactoryContext( - ReadonlyConfig.fromMap( - new HashMap() { - { - put(DorisOptions.DATABASE.key(), DATABASE); - put(DorisOptions.TABLE.key(), SINK_TABLE); - put(DorisOptions.USERNAME.key(), USERNAME); - put(DorisOptions.PASSWORD.key(), PASSWORD); - put( - DorisOptions.DORIS_READ_FIELD.key(), - "k1,k2"); - put( - DorisOptions.FENODES.key(), - container.getHost() - + ":" - + HTTP_PORT); - put( - DorisOptions.QUERY_PORT.key(), - QUERY_PORT); - } - }), - Thread.currentThread().getContextClassLoader())) - .createSource(); - CatalogTable table = (CatalogTable) dorisSource.getProducedCatalogTables().get(0); - Assertions.assertIterableEquals( - Arrays.asList("k1", "k2"), table.getTableSchema().getPrimaryKey().getColumnNames()); - catalog.dropTable(tablePath, false); - Assertions.assertFalse(catalog.tableExists(tablePath)); - } - @AfterAll public void close() { if (catalog != null) { diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java index 97b9b1a3523..fb414aacd9a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisIT.java @@ -126,7 +126,7 @@ private void checkSinkData() { String sinkSql = String.format("select * from %s.%s order by F_ID", sinkDB, TABLE); List columnList = Arrays.stream(COLUMN_STRING.split(",")) - .map(String::trim) + .map(x -> x.trim()) .collect(Collectors.toList()); Statement sourceStatement = conn.createStatement( @@ -284,8 +284,8 @@ private List genDorisTestData(Long nums) { GenerateTestData.genString(1), GenerateTestData.genString(11), GenerateTestData.genString(12), - GenerateTestData.genDatetimeString(true), GenerateTestData.genDatetimeString(false), + GenerateTestData.genDatetimeString(true), GenerateTestData.genDateString() })); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/pom.xml deleted file mode 100644 index 84272733b81..00000000000 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/pom.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - 4.0.0 - - org.apache.seatunnel - seatunnel-connector-v2-e2e - ${revision} - - - connector-easysearch-e2e - SeaTunnel : E2E : Connector V2 : Easysearch - - - - - org.apache.seatunnel - connector-fake - ${project.version} - test - - - org.apache.seatunnel - connector-easysearch - ${project.version} - test - - - diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/src/test/java/org/apache/seatunnel/e2e/connector/easysearch/EasysearchIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/src/test/java/org/apache/seatunnel/e2e/connector/easysearch/EasysearchIT.java deleted file mode 100644 index 5a53d2288be..00000000000 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/src/test/java/org/apache/seatunnel/e2e/connector/easysearch/EasysearchIT.java +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.e2e.connector.easysearch; - -import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; -import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; - -import org.apache.seatunnel.common.utils.JsonUtils; -import org.apache.seatunnel.connectors.seatunnel.easysearch.client.EasysearchClient; -import org.apache.seatunnel.connectors.seatunnel.easysearch.dto.source.ScrollResult; -import org.apache.seatunnel.e2e.common.TestResource; -import org.apache.seatunnel.e2e.common.TestSuiteBase; -import org.apache.seatunnel.e2e.common.container.EngineType; -import org.apache.seatunnel.e2e.common.container.TestContainer; -import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; - -import org.awaitility.Awaitility; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.TestTemplate; -import org.testcontainers.containers.Container; -import org.testcontainers.containers.GenericContainer; -import org.testcontainers.containers.output.Slf4jLogConsumer; -import org.testcontainers.lifecycle.Startables; -import org.testcontainers.utility.DockerLoggerFactory; - -import com.google.common.collect.Lists; -import lombok.extern.slf4j.Slf4j; - -import java.io.IOException; -import java.math.BigDecimal; -import java.time.Duration; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneOffset; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -@Slf4j -public class EasysearchIT extends TestSuiteBase implements TestResource { - - private static final String EZS_DOCKER_IMAGE = "infinilabs/easysearch-amd64:seatunnel"; - - private static final String HOST = "e2e_easysearch"; - - private static final int PORT = 9200; - private List testDataset; - - private GenericContainer easysearchServer; - - private EasysearchClient easysearchClient; - - @BeforeEach - @Override - public void startUp() throws Exception { - easysearchServer = - new GenericContainer<>(EZS_DOCKER_IMAGE) - .withNetwork(NETWORK) - .withNetworkAliases(HOST) - .withPrivilegedMode(true) - .withEnv("cluster.routing.allocation.disk.threshold_enabled", "false") - .withStartupAttempts(5) - .withStartupTimeout(Duration.ofMinutes(5)) - .withLogConsumer( - new Slf4jLogConsumer( - DockerLoggerFactory.getLogger(EZS_DOCKER_IMAGE))); - easysearchServer.setPortBindings(Lists.newArrayList(String.format("%s:%s", PORT, PORT))); - Startables.deepStart(Stream.of(easysearchServer)).join(); - log.info("Easysearch container started"); - // prepare test dataset - testDataset = generateTestDataSet(); - // wait for easysearch fully start - Awaitility.given() - .ignoreExceptions() - .atLeast(5L, TimeUnit.SECONDS) - .pollInterval(1L, TimeUnit.SECONDS) - .atMost(120L, TimeUnit.SECONDS) - .untilAsserted(this::initConnection); - } - - private void initConnection() { - String host = easysearchServer.getContainerIpAddress(); - String endpoint = String.format("https://%s:%d", host, PORT); - easysearchClient = - EasysearchClient.createInstance( - Lists.newArrayList(endpoint), - Optional.of("admin"), - Optional.of("admin"), - false, - false, - Optional.empty(), - Optional.empty(), - Optional.empty(), - Optional.empty()); - createIndexDocs(); - } - - /** create a index,and bulk some documents */ - private void createIndexDocs() { - StringBuilder requestBody = new StringBuilder(); - String indexHeader = "{\"index\":{\"_index\":\"st_index\"}}\n"; - for (int i = 0; i < testDataset.size(); i++) { - String row = testDataset.get(i); - requestBody.append(indexHeader); - requestBody.append(row); - requestBody.append("\n"); - } - easysearchClient.bulk(requestBody.toString()); - } - - @DisabledOnContainer( - value = {}, - type = {EngineType.SPARK, EngineType.FLINK}, - disabledReason = "Test only one engine for first change") - @TestTemplate - public void testEasysearch(TestContainer container) throws IOException, InterruptedException { - Container.ExecResult execResult = - container.executeJob("/easysearch/easysearch_source_and_sink.conf"); - Assertions.assertEquals(0, execResult.getExitCode()); - List sinkData = readSinkData(); - // for DSL is: {"range":{"c_int":{"gte":10,"lte":20}}} - Assertions.assertIterableEquals(mapTestDatasetForDSL(), sinkData); - } - - private List generateTestDataSet() throws JsonProcessingException { - String[] fields = - new String[] { - "c_map", - "c_array", - "c_string", - "c_boolean", - "c_tinyint", - "c_smallint", - "c_int", - "c_bigint", - "c_float", - "c_double", - "c_decimal", - "c_bytes", - "c_date", - "c_timestamp" - }; - List documents = new ArrayList<>(); - ObjectMapper objectMapper = new ObjectMapper(); - for (int i = 0; i < 100; i++) { - Map doc = new HashMap<>(); - Object[] crow_values = - new Object[] { - Collections.singletonMap("crow_key", Short.parseShort(String.valueOf(i))), - new Byte[] {Byte.parseByte("1"), Byte.parseByte("2"), Byte.parseByte("3")}, - "crow_string" - }; - Object[] values = - new Object[] { - Collections.singletonMap("key", Short.parseShort(String.valueOf(i))), - new Byte[] {Byte.parseByte("1"), Byte.parseByte("2"), Byte.parseByte("3")}, - "string", - Boolean.FALSE, - Byte.parseByte("1"), - Short.parseShort("1"), - i, - Long.parseLong("1"), - Float.parseFloat("1.1"), - Double.parseDouble("1.1"), - BigDecimal.valueOf(11, 1), - "test".getBytes(), - LocalDate.now().toString(), - System.currentTimeMillis() - }; - for (int j = 0; j < fields.length; j++) { - doc.put(fields[j], values[j]); - } - documents.add(objectMapper.writeValueAsString(doc)); - } - return documents; - } - - private List readSinkData() throws InterruptedException { - // wait for index refresh - Thread.sleep(2000); - List source = - Lists.newArrayList( - "c_map", - "c_array", - "c_string", - "c_boolean", - "c_tinyint", - "c_smallint", - "c_int", - "c_bigint", - "c_float", - "c_double", - "c_decimal", - "c_bytes", - "c_date", - "c_timestamp"); - HashMap rangeParam = new HashMap<>(); - rangeParam.put("gte", 10); - rangeParam.put("lte", 20); - HashMap range = new HashMap<>(); - range.put("c_int", rangeParam); - Map query = new HashMap<>(); - query.put("range", range); - ScrollResult scrollResult = - easysearchClient.searchByScroll("st_index2", source, query, "1m", 1000); - scrollResult - .getDocs() - .forEach( - x -> { - x.remove("_index"); - x.remove("_type"); - x.remove("_id"); - // I don’t know if converting the test cases in this way complies with - // the CI specification - x.replace( - "c_timestamp", - LocalDateTime.parse(x.get("c_timestamp").toString()) - .toInstant(ZoneOffset.UTC) - .toEpochMilli()); - }); - List docs = - scrollResult.getDocs().stream() - .sorted( - Comparator.comparingInt( - o -> Integer.valueOf(o.get("c_int").toString()))) - .map(JsonUtils::toJsonString) - .collect(Collectors.toList()); - return docs; - } - - private List mapTestDatasetForDSL() { - return testDataset.stream() - .map(JsonUtils::parseObject) - .filter( - node -> { - if (node.hasNonNull("c_int")) { - int cInt = node.get("c_int").asInt(); - return cInt >= 10 && cInt <= 20; - } - return false; - }) - .map(JsonNode::toString) - .collect(Collectors.toList()); - } - - @AfterEach - @Override - public void tearDown() { - if (Objects.nonNull(easysearchClient)) { - easysearchClient.close(); - } - easysearchServer.close(); - } -} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/src/test/resources/easysearch/easysearch_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/src/test/resources/easysearch/easysearch_source_and_sink.conf deleted file mode 100644 index 3af53a9a6be..00000000000 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-easysearch-e2e/src/test/resources/easysearch/easysearch_source_and_sink.conf +++ /dev/null @@ -1,74 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -###### -###### This config file is a demonstration of streaming processing in seatunnel config -###### - -env { - # You can set flink configuration here - execution.parallelism = 1 - job.mode = "BATCH" - #execution.checkpoint.interval = 10000 - #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" -} - -source { - Easysearch { - hosts = ["https://e2e_easysearch:9200"] - username = "admin" - password = "admin" - tls_verify_certificate = false - tls_verify_hostname = false - - index = "st_index" - query = {"range": {"c_int": {"gte": 10, "lte": 20}}} - schema = { - fields { - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_tinyint = tinyint - c_smallint = smallint - c_int = int - c_bigint = bigint - c_float = float - c_double = double - c_decimal = "decimal(2, 1)" - c_bytes = bytes - c_date = date - c_timestamp = timestamp - } - } - } -} - -transform { -} - -sink { - Easysearch { - hosts = ["https://e2e_easysearch:9200"] - username = "admin" - password = "admin" - tls_verify_certificate = false - tls_verify_hostname = false - - index = "st_index2" - } -} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/fstp/SftpFileIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/fstp/SftpFileIT.java index 9645268882e..e5fbcb5f5ef 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/fstp/SftpFileIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/fstp/SftpFileIT.java @@ -93,11 +93,6 @@ public void startUp() throws Exception { "/home/seatunnel/tmp/seatunnel/read/excel_filter/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx", sftpContainer); - ContainerUtil.copyFileIntoContainers( - "/xml/e2e.xml", - "/home/seatunnel/tmp/seatunnel/read/xml/name=tyrantlucifer/hobby=coding/e2e.xml", - sftpContainer); - sftpContainer.execInContainer("sh", "-c", "chown -R seatunnel /home/seatunnel/tmp/"); } @@ -125,10 +120,6 @@ public void testSftpFileReadAndWrite(TestContainer container) helper.execute("/json/fake_to_sftp_file_json.conf"); // test read sftp json file helper.execute("/json/sftp_file_json_to_assert.conf"); - // test write sftp xml file - helper.execute("/xml/fake_to_sftp_file_xml.conf"); - // test read sftp xml file - helper.execute("/xml/sftp_file_xml_to_assert.conf"); } @AfterAll diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/e2e.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/e2e.xml deleted file mode 100644 index 0ffec43a150..00000000000 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/e2e.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/fake_to_sftp_file_xml.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/fake_to_sftp_file_xml.conf deleted file mode 100644 index 9b25ec0446b..00000000000 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/fake_to_sftp_file_xml.conf +++ /dev/null @@ -1,88 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -env { - parallelism = 1 - job.mode = "BATCH" - - # You can set spark configuration here - spark.app.name = "SeaTunnel" - spark.executor.instances = 1 - spark.executor.cores = 1 - spark.executor.memory = "1g" - spark.master = local -} - -source { - FakeSource { - result_table_name = "sftp" - schema = { - fields { - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_tinyint = tinyint - c_smallint = smallint - c_int = int - c_bigint = bigint - c_float = float - c_double = double - c_bytes = bytes - c_date = date - c_decimal = "decimal(38, 18)" - c_timestamp = timestamp - c_row = { - c_map = "map" - c_array = "array" - c_string = string - c_boolean = boolean - c_tinyint = tinyint - c_smallint = smallint - c_int = int - c_bigint = bigint - c_float = float - c_double = double - c_bytes = bytes - c_date = date - c_decimal = "decimal(38, 18)" - c_timestamp = timestamp - } - } - } - } -} - -sink { - SftpFile { - host = "sftp" - port = 22 - user = seatunnel - password = pass - path = "tmp/seatunnel/xml" - source_table_name = "sftp" - partition_dir_expression = "${k0}=${v0}" - is_partition_field_write_in_file = true - file_name_expression = "${transactionId}_${now}" - file_format_type = "xml" - filename_time_format = "yyyy.MM.dd" - is_enable_transaction = true - xml_root_tag = "RECORDS" - xml_row_tag = "RECORD" - xml_use_attr_format = true - } -} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/sftp_file_xml_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/sftp_file_xml_to_assert.conf deleted file mode 100644 index f9c26128ce8..00000000000 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-sftp-e2e/src/test/resources/xml/sftp_file_xml_to_assert.conf +++ /dev/null @@ -1,121 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -env { - parallelism = 1 - job.mode = "BATCH" - - # You can set spark configuration here - spark.app.name = "SeaTunnel" - spark.executor.instances = 1 - spark.executor.cores = 1 - spark.executor.memory = "1g" - spark.master = local -} - -source { - SftpFile { - host = "sftp" - port = 22 - user = seatunnel - password = pass - path = "tmp/seatunnel/read/xml" - file_format_type = "xml" - result_table_name = "sftp" - xml_row_tag = "RECORD" - xml_use_attr_format = true - schema = { - fields { - c_bytes = "tinyint" - c_short = "smallint" - c_int = "int" - c_bigint = "bigint" - c_string = "string" - c_double = "double" - c_float = "float" - c_decimal = "decimal(10, 2)" - c_boolean = "boolean" - c_map = "map" - c_array = "array" - c_date = "date" - c_datetime = "timestamp" - c_time = "time" - } - } - } -} - -sink { - Assert { - result_table_name = "sftp" - rules { - row_rules = [ - { - rule_type = MAX_ROW - rule_value = 1 - } - ], - field_rules = [ - { - field_name = c_string - field_type = string - field_value = [ - { - rule_type = NOT_NULL - } - ] - }, - { - field_name = c_boolean - field_type = boolean - field_value = [ - { - rule_type = NOT_NULL - } - ] - }, - { - field_name = c_double - field_type = double - field_value = [ - { - rule_type = NOT_NULL - } - ] - }, - { - field_name = name - field_type = string - field_value = [ - { - rule_type = NOT_NULL - } - ] - }, - { - field_name = hobby - field_type = string - field_value = [ - { - rule_type = NOT_NULL - } - ] - } - ] - } - } -} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java index 7ec61751616..e97dc1b52bc 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java @@ -27,7 +27,6 @@ import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.output.Slf4jLogConsumer; import org.testcontainers.containers.wait.strategy.Wait; -import org.testcontainers.images.PullPolicy; import org.testcontainers.utility.DockerLoggerFactory; import com.google.common.collect.Lists; @@ -284,7 +283,6 @@ GenericContainer initContainer() { .withNetwork(NETWORK) .withNetworkAliases(HOSTNAME) .withExposedPorts(PORT) - .withImagePullPolicy(PullPolicy.alwaysPull()) .waitingFor(Wait.forLogMessage(".*boot success!.*", 1)) .withStartupTimeout(Duration.ofMinutes(5)) .withLogConsumer(new Slf4jLogConsumer(DockerLoggerFactory.getLogger(IMAGE))); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml index 459c49a4d02..5e636cb41ac 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml @@ -64,7 +64,6 @@ connector-pulsar-e2e connector-paimon-e2e connector-kudu-e2e - connector-easysearch-e2e connector-cdc-postgres-e2e connector-cdc-oracle-e2e diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java index 5427a8e1c2b..c6cb429c0aa 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java @@ -108,40 +108,6 @@ public void testGetRunningJobById() { }); } - @Test - public void testGetAnNotExistJobById() { - Arrays.asList(node2, node1) - .forEach( - instance -> { - given().get( - HOST - + instance.getCluster() - .getLocalMember() - .getAddress() - .getPort() - + RestConstant.RUNNING_JOB_URL - + "/" - + 123) - .then() - .statusCode(200) - .body("jobId", equalTo("123")); - }); - Arrays.asList(node2, node1) - .forEach( - instance -> { - given().get( - HOST - + instance.getCluster() - .getLocalMember() - .getAddress() - .getPort() - + RestConstant.RUNNING_JOB_URL - + "/") - .then() - .statusCode(500); - }); - } - @Test public void testGetRunningJobs() { Arrays.asList(node2, node1) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java index 81a1047c749..cbd2f724e3b 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java @@ -275,7 +275,7 @@ private void handleJobInfoById(HttpGetCommand command, String uri) { if (!jobId.isEmpty() && jobInfo != null) { this.prepareResponse(command, convertToJson(jobInfo, Long.parseLong(jobId))); } else { - this.prepareResponse(command, new JsonObject().add(RestConstant.JOB_ID, jobId)); + this.prepareResponse(command, new JsonObject()); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java index 95e54980b48..f5964badee0 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java @@ -168,8 +168,8 @@ public void collect() throws Exception { "previous schema changes in progress, schemaChangePhase: " + schemaChangePhase.get()); } - schemaChangePhase.set(SchemaChangePhase.createBeforePhase()); runningTask.triggerSchemaChangeBeforeCheckpoint().get(); + schemaChangePhase.set(SchemaChangePhase.createBeforePhase()); log.info("triggered schema-change-before checkpoint, stopping collect data"); } else if (collector.captureSchemaChangeAfterCheckpointSignal()) { if (schemaChangePhase.get() != null) { @@ -177,8 +177,8 @@ public void collect() throws Exception { "previous schema changes in progress, schemaChangePhase: " + schemaChangePhase.get()); } - schemaChangePhase.set(SchemaChangePhase.createAfterPhase()); runningTask.triggerSchemaChangeAfterCheckpoint().get(); + schemaChangePhase.set(SchemaChangePhase.createAfterPhase()); log.info("triggered schema-change-after checkpoint, stopping collect data"); } } else { @@ -284,32 +284,25 @@ public void triggerBarrier(Barrier barrier) throws Exception { currentTaskLocation); CheckpointType checkpointType = ((CheckpointBarrier) barrier).getCheckpointType(); - if (checkpointType.isSchemaChangeCheckpoint()) { - if (schemaChanging()) { - if (checkpointType.isSchemaChangeBeforeCheckpoint() - && schemaChangePhase.get().isBeforePhase()) { - schemaChangePhase.get().setCheckpointId(barrier.getId()); - } else if (checkpointType.isSchemaChangeAfterCheckpoint() - && schemaChangePhase.get().isAfterPhase()) { - schemaChangePhase.get().setCheckpointId(barrier.getId()); - } else { - throw new IllegalStateException( - String.format( - "schema-change checkpoint[%s,%s] and phase[%s] is not matched", - barrier.getId(), - checkpointType, - schemaChangePhase.get().getPhase())); - } - log.info( - "lock checkpoint[{}] waiting for complete..., phase: [{}]", - barrier.getId(), - schemaChangePhase.get().getPhase()); + if (schemaChanging() && checkpointType.isSchemaChangeCheckpoint()) { + if (checkpointType.isSchemaChangeBeforeCheckpoint() + && schemaChangePhase.get().isBeforePhase()) { + schemaChangePhase.get().setCheckpointId(barrier.getId()); + } else if (checkpointType.isSchemaChangeAfterCheckpoint() + && schemaChangePhase.get().isAfterPhase()) { + schemaChangePhase.get().setCheckpointId(barrier.getId()); } else { throw new IllegalStateException( String.format( - "schema-change checkpoint[%s] and phase[%s] is not matched", - barrier.getId(), checkpointType)); + "schema-change checkpoint[%s,%s] and phase[%s] is not matched", + barrier.getId(), + checkpointType, + schemaChangePhase.get().getPhase())); } + log.info( + "lock checkpoint[{}] waiting for complete..., phase: [{}]", + barrier.getId(), + schemaChangePhase.get().getPhase()); } } diff --git a/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscoveryTest.java b/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscoveryTest.java index 9a2e45c62f0..18b86a65a84 100644 --- a/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscoveryTest.java +++ b/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscoveryTest.java @@ -50,11 +50,11 @@ public void before() { public void testGetAllPlugins() { Map sourcePlugins = AbstractPluginDiscovery.getAllSupportedPlugins(PluginType.SOURCE); - Assertions.assertEquals(28, sourcePlugins.size()); + Assertions.assertEquals(27, sourcePlugins.size()); Map sinkPlugins = AbstractPluginDiscovery.getAllSupportedPlugins(PluginType.SINK); - Assertions.assertEquals(32, sinkPlugins.size()); + Assertions.assertEquals(31, sinkPlugins.size()); } @AfterEach diff --git a/seatunnel-plugin-discovery/src/test/resources/home/connectors/plugin-mapping.properties b/seatunnel-plugin-discovery/src/test/resources/home/connectors/plugin-mapping.properties index 9bbe4893ea1..f34f17a8aed 100644 --- a/seatunnel-plugin-discovery/src/test/resources/home/connectors/plugin-mapping.properties +++ b/seatunnel-plugin-discovery/src/test/resources/home/connectors/plugin-mapping.properties @@ -145,6 +145,4 @@ seatunnel.sink.StarRocks = connector-starrocks seatunnel.source.MyHours = connector-http-myhours seatunnel.sink.InfluxDB = connector-influxdb seatunnel.source.GoogleSheets = connector-google-sheets -seatunnel.source.Easysearch = connector-easysearch -seatunnel.sink.Easysearch = connector-easysearch seatunnel.sink.Pulsar = connector-pulsar