apache · hailin0 · Mar 14, 2024 · Mar 10, 2024 · Mar 10, 2024
diff --git a/docs/en/connector-v2/sink/CosFile.md b/docs/en/connector-v2/sink/CosFile.md
@@ -61,6 +61,7 @@ By default, we use 2PC commit to ensure `exactly-once`
 | xml_root_tag                     | string  | no       | RECORDS                                    | Only used when file_format is xml.                                                                                |
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml.                                                                                |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml.                                                                                |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                                             |
 
 ### path [string]
 
@@ -205,6 +206,11 @@ Specifies the tag name of the data rows within the XML file.
 
 Specifies Whether to process data using the tag attribute format.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## Example
 
 For text file format with `have_partition` and `custom_filename` and `sink_columns`

diff --git a/docs/en/connector-v2/sink/FtpFile.md b/docs/en/connector-v2/sink/FtpFile.md
@@ -60,6 +60,7 @@ By default, we use 2PC commit to ensure `exactly-once`
 | xml_root_tag                     | string  | no       | RECORDS                                    | Only used when file_format is xml.                                                                                |
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml.                                                                                |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml.                                                                                |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                                             |
 
 ### host [string]
 
@@ -210,6 +211,11 @@ Specifies the tag name of the data rows within the XML file.
 
 Specifies Whether to process data using the tag attribute format.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## Example
 
 For text file format simple config

diff --git a/docs/en/connector-v2/sink/HdfsFile.md b/docs/en/connector-v2/sink/HdfsFile.md
@@ -67,6 +67,7 @@ Output data to hdfs file
 | xml_root_tag                     | string  | no       | RECORDS                                    | Only used when file_format is xml, specifies the tag name of the root element within the XML file.                                                                                                                                                                                                                                                                                                                                                                                       |
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml, specifies the tag name of the data rows within the XML file                                                                                                                                                                                                                                                                                                                                                                                           |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml, specifies Whether to process data using the tag attribute format.                                                                                                                                                                                                                                                                                                                                                                                     |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 
 ### Tips
 

diff --git a/docs/en/connector-v2/sink/LocalFile.md b/docs/en/connector-v2/sink/LocalFile.md
@@ -56,6 +56,7 @@ By default, we use 2PC commit to ensure `exactly-once`
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml.                                                                |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml.                                                                |
 | enable_header_write              | boolean | no       | false                                      | Only used when file_format_type is text,csv.<br/> false:don't write header,true:write header.     |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                             |
 
 ### path [string]
 
@@ -188,6 +189,11 @@ Specifies Whether to process data using the tag attribute format.
 
 Only used when file_format_type is text,csv.false:don't write header,true:write header.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## Example
 
 For orc file format simple config
@@ -201,6 +207,18 @@ LocalFile {
 
 ```
 
+For json, text, csv or xml file format with `encoding`
+
+```hocon
+
+LocalFile {
+    path = "/tmp/hive/warehouse/test2"
+    file_format_type = "text"
+    encoding = "gbk"
+}
+
+```
+
 For parquet file format with `sink_columns`
 
 ```bash

diff --git a/docs/en/connector-v2/sink/OssFile.md b/docs/en/connector-v2/sink/OssFile.md
@@ -112,6 +112,7 @@ If write to `csv`, `text` file type, All column will be string.
 | xml_root_tag                     | string  | no       | RECORDS                                    | Only used when file_format is xml.                                                                                |
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml.                                                                                |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml.                                                                                |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                                             |
 
 ### path [string]
 
@@ -256,6 +257,11 @@ Specifies the tag name of the data rows within the XML file.
 
 Specifies Whether to process data using the tag attribute format.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## How to Create an Oss Data Synchronization Jobs
 
 The following example demonstrates how to create a data synchronization job that reads data from Fake Source and writes it to the Oss:

diff --git a/docs/en/connector-v2/sink/OssJindoFile.md b/docs/en/connector-v2/sink/OssJindoFile.md
@@ -65,6 +65,7 @@ By default, we use 2PC commit to ensure `exactly-once`
 | xml_root_tag                     | string  | no       | RECORDS                                    | Only used when file_format is xml.                                                                                |
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml.                                                                                |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml.                                                                                |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                                             |
 
 ### path [string]
 
@@ -209,6 +210,11 @@ Specifies the tag name of the data rows within the XML file.
 
 Specifies Whether to process data using the tag attribute format.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## Example
 
 For text file format with `have_partition` and `custom_filename` and `sink_columns`

diff --git a/docs/en/connector-v2/sink/S3File.md b/docs/en/connector-v2/sink/S3File.md
@@ -123,6 +123,7 @@ If write to `csv`, `text` file type, All column will be string.
 | hadoop_s3_properties             | map     | no       |                                                       | If you need to add a other option, you could add it here and refer to this [link](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html)       |
 | schema_save_mode                 | Enum    | no       | CREATE_SCHEMA_WHEN_NOT_EXIST                          | Before turning on the synchronous task, do different treatment of the target path                                                                                     |
 | data_save_mode                   | Enum    | no       | APPEND_DATA                                           | Before opening the synchronous task, the data file in the target path is differently processed                                                                        |
+| encoding                         | string  | no       | "UTF-8"                                               | Only used when file_format_type is json,text,csv,xml.                                                                                                                 |
 
 ### path [string]
 
@@ -278,6 +279,11 @@ Option introduction：
 `APPEND_DATA`：use the path, and add new files in the path for write data.   
 `ERROR_WHEN_DATA_EXISTS`：When there are some data files in the path, an error will is reported.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## Example
 
 ### Simple:

diff --git a/docs/en/connector-v2/sink/SftpFile.md b/docs/en/connector-v2/sink/SftpFile.md
@@ -59,6 +59,7 @@ By default, we use 2PC commit to ensure `exactly-once`
 | xml_root_tag                     | string  | no       | RECORDS                                    | Only used when file_format is xml.                                                                                |
 | xml_row_tag                      | string  | no       | RECORD                                     | Only used when file_format is xml.                                                                                |
 | xml_use_attr_format              | boolean | no       | -                                          | Only used when file_format is xml.                                                                                |
+| encoding                         | string  | no       | "UTF-8"                                    | Only used when file_format_type is json,text,csv,xml.                                                             |
 
 ### host [string]
 
@@ -203,6 +204,11 @@ Specifies the tag name of the data rows within the XML file.
 
 Specifies Whether to process data using the tag attribute format.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to write. This param will be parsed by `Charset.forName(encoding)`.
+
 ## Example
 
 For text file format with `have_partition` and `custom_filename` and `sink_columns`

diff --git a/docs/en/connector-v2/source/CosFile.md b/docs/en/connector-v2/source/CosFile.md
@@ -65,6 +65,7 @@ To use this connector you need put hadoop-cos-{hadoop.version}-{version}.jar and
 | xml_use_attr_format       | boolean | no       | -                   |
 | file_filter_pattern       | string  | no       | -                   |
 | compress_codec            | string  | no       | none                |
+| encoding                  | string  | no       | UTF-8               |
 | common-options            |         | no       | -                   |
 
 ### path [string]
@@ -277,6 +278,11 @@ The compress codec of files and the details that supported as the following show
 - orc/parquet:  
   automatically recognizes the compression type, no additional settings required.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to read. This param will be parsed by `Charset.forName(encoding)`.
+
 ### common options
 
 Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details.

diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md
@@ -59,6 +59,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 | xml_use_attr_format       | boolean | no       | -                   |
 | file_filter_pattern       | string  | no       | -                   |
 | compress_codec            | string  | no       | none                |
+| encoding                  | string  | no       | UTF-8               |
 | common-options            |         | no       | -                   |
 
 ### host [string]
@@ -258,6 +259,11 @@ The compress codec of files and the details that supported as the following show
 - orc/parquet:  
   automatically recognizes the compression type, no additional settings required.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to read. This param will be parsed by `Charset.forName(encoding)`.
+
 ### common options
 
 Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details.

diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md
@@ -62,6 +62,7 @@ Read data from hdfs file system.
 | xml_row_tag               | string  | no       | -                   | Specifies the tag name of the data rows within the XML file, only used when file_format is xml.                                                                                                                                                                                                                                               |
 | xml_use_attr_format       | boolean | no       | -                   | Specifies whether to process data using the tag attribute format, only used when file_format is xml.                                                                                                                                                                                                                                          |
 | compress_codec            | string  | no       | none                | The compress codec of files                                                                                                                                                                                                                                                                                                                   |
+| encoding                  | string  | no       | UTF-8               |
 | common-options            |         | no       | -                   | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details.                                                                                                                                                                                                                                      |
 
 ### delimiter/field_delimiter [string]
@@ -78,6 +79,11 @@ The compress codec of files and the details that supported as the following show
 - orc/parquet:  
   automatically recognizes the compression type, no additional settings required.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to read. This param will be parsed by `Charset.forName(encoding)`.
+
 ### Tips
 
 > If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this.

diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
@@ -59,6 +59,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 | xml_use_attr_format       | boolean | no       | -                                    |
 | file_filter_pattern       | string  | no       | -                                    |
 | compress_codec            | string  | no       | none                                 |
+| encoding                  | string  | no       | UTF-8                                |
 | common-options            |         | no       | -                                    |
 | tables_configs            | list    | no       | used to define a multiple table task |
 
@@ -256,6 +257,11 @@ The compress codec of files and the details that supported as the following show
 - orc/parquet:  
   automatically recognizes the compression type, no additional settings required.
 
+### encoding [string]
+
+Only used when file_format_type is json,text,csv,xml.
+The encoding of the file to read. This param will be parsed by `Charset.forName(encoding)`.
+
 ### common options
 
 Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details
@@ -292,6 +298,18 @@ LocalFile {
 
 ```
 
+For json, text or csv file format with `encoding`
+
+```hocon
+
+LocalFile {
+    path = "/tmp/hive/warehouse/test2"
+    file_format_type = "text"
+    encoding = "gbk"
+}
+
+```
+
 ### Multiple Table
 
 ```hocon