21
21
import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .FILE_FORMAT ;
22
22
import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .FILE_NAME_EXPRESSION ;
23
23
import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .IS_PARTITION_FIELD_WRITE_IN_FILE ;
24
+ import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .PARTITION_BY ;
24
25
import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .PATH ;
25
26
import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .ROW_DELIMITER ;
26
27
import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .SAVE_MODE ;
28
+ import static org .apache .seatunnel .connectors .seatunnel .file .config .Constant .SINK_COLUMNS ;
27
29
import static org .apache .seatunnel .connectors .seatunnel .hive .config .HiveConfig .ORC_OUTPUT_FORMAT_CLASSNAME ;
28
30
import static org .apache .seatunnel .connectors .seatunnel .hive .config .HiveConfig .PARQUET_OUTPUT_FORMAT_CLASSNAME ;
29
31
import static org .apache .seatunnel .connectors .seatunnel .hive .config .HiveConfig .TEXT_OUTPUT_FORMAT_CLASSNAME ;
30
32
31
33
import org .apache .seatunnel .api .common .PrepareFailException ;
32
34
import org .apache .seatunnel .api .sink .SeaTunnelSink ;
33
- import org .apache .seatunnel .api .table . type . SeaTunnelRowType ;
35
+ import org .apache .seatunnel .api .sink . SinkAggregatedCommitter ;
34
36
import org .apache .seatunnel .common .config .CheckConfigUtil ;
35
37
import org .apache .seatunnel .common .config .CheckResult ;
36
38
import org .apache .seatunnel .common .constants .PluginType ;
37
39
import org .apache .seatunnel .connectors .seatunnel .file .config .FileFormat ;
38
40
import org .apache .seatunnel .connectors .seatunnel .file .config .HadoopConf ;
39
41
import org .apache .seatunnel .connectors .seatunnel .file .hdfs .sink .BaseHdfsFileSink ;
42
+ import org .apache .seatunnel .connectors .seatunnel .file .sink .commit .FileAggregatedCommitInfo ;
43
+ import org .apache .seatunnel .connectors .seatunnel .file .sink .commit .FileCommitInfo ;
40
44
import org .apache .seatunnel .connectors .seatunnel .file .sink .config .SaveMode ;
45
+ import org .apache .seatunnel .connectors .seatunnel .hive .commit .HiveSinkAggregatedCommitter ;
41
46
import org .apache .seatunnel .connectors .seatunnel .hive .config .HiveConfig ;
42
- import org .apache .seatunnel .connectors .seatunnel .hive .utils .HiveMetaStoreProxy ;
43
47
44
48
import org .apache .seatunnel .shade .com .typesafe .config .Config ;
45
49
import org .apache .seatunnel .shade .com .typesafe .config .ConfigValueFactory ;
50
54
import org .apache .hadoop .hive .metastore .api .FieldSchema ;
51
55
import org .apache .hadoop .hive .metastore .api .Table ;
52
56
57
+ import java .io .IOException ;
53
58
import java .net .URI ;
54
59
import java .net .URISyntaxException ;
55
60
import java .util .List ;
56
61
import java .util .Map ;
62
+ import java .util .Optional ;
57
63
import java .util .stream .Collectors ;
58
64
59
65
@ AutoService (SeaTunnelSink .class )
@@ -67,29 +73,6 @@ public String getPluginName() {
67
73
return "Hive" ;
68
74
}
69
75
70
- @ Override
71
- public void setTypeInfo (SeaTunnelRowType seaTunnelRowType ) {
72
- super .setTypeInfo (seaTunnelRowType );
73
- HiveMetaStoreProxy hiveMetaStoreProxy = HiveMetaStoreProxy .getInstance (pluginConfig );
74
- // --------------------Check textFileSinkConfig with the hive table info-------------------
75
- List <FieldSchema > fields = hiveMetaStoreProxy .getTableFields (dbName , tableName );
76
- List <FieldSchema > partitionKeys = tableInformation .getPartitionKeys ();
77
-
78
- // Remove partitionKeys from table fields
79
- List <FieldSchema > fieldNotContainPartitionKey = fields .stream ().filter (filed -> !partitionKeys .contains (filed )).collect (Collectors .toList ());
80
-
81
- // check fields size must same as sinkColumnList size
82
- if (fieldNotContainPartitionKey .size () != textFileSinkConfig .getSinkColumnList ().size ()) {
83
- throw new RuntimeException ("sink columns size must same as hive table field size" );
84
- }
85
-
86
- // check hivePartitionFieldList size must same as partitionFieldList size
87
- if (partitionKeys .size () != textFileSinkConfig .getPartitionFieldList ().size ()) {
88
- throw new RuntimeException ("partition by columns size must same as hive table partition columns size" );
89
- }
90
- hiveMetaStoreProxy .close ();
91
- }
92
-
93
76
@ Override
94
77
public void prepare (Config pluginConfig ) throws PrepareFailException {
95
78
CheckResult result = CheckConfigUtil .checkAllExists (pluginConfig , HiveConfig .METASTORE_URI , HiveConfig .TABLE_NAME );
@@ -100,6 +83,13 @@ public void prepare(Config pluginConfig) throws PrepareFailException {
100
83
dbName = tableInfo .getLeft ()[0 ];
101
84
tableName = tableInfo .getLeft ()[1 ];
102
85
tableInformation = tableInfo .getRight ();
86
+ List <String > sinkFields = tableInformation .getSd ().getCols ().stream ()
87
+ .map (FieldSchema ::getName )
88
+ .collect (Collectors .toList ());
89
+ List <String > partitionKeys = tableInformation .getPartitionKeys ().stream ()
90
+ .map (FieldSchema ::getName )
91
+ .collect (Collectors .toList ());
92
+ sinkFields .addAll (partitionKeys );
103
93
String outputFormat = tableInformation .getSd ().getOutputFormat ();
104
94
if (TEXT_OUTPUT_FORMAT_CLASSNAME .equals (outputFormat )) {
105
95
Map <String , String > parameters = tableInformation .getSd ().getSerdeInfo ().getParameters ();
@@ -113,10 +103,12 @@ public void prepare(Config pluginConfig) throws PrepareFailException {
113
103
} else {
114
104
throw new RuntimeException ("Only support [text parquet orc] file now" );
115
105
}
116
- pluginConfig = pluginConfig .withValue (IS_PARTITION_FIELD_WRITE_IN_FILE , ConfigValueFactory .fromAnyRef (false ))
117
- .withValue (FILE_NAME_EXPRESSION , ConfigValueFactory .fromAnyRef ("${transactionId}" ))
118
- .withValue (PATH , ConfigValueFactory .fromAnyRef (tableInformation .getSd ().getLocation ()));
119
-
106
+ pluginConfig = pluginConfig
107
+ .withValue (IS_PARTITION_FIELD_WRITE_IN_FILE , ConfigValueFactory .fromAnyRef (false ))
108
+ .withValue (FILE_NAME_EXPRESSION , ConfigValueFactory .fromAnyRef ("${transactionId}" ))
109
+ .withValue (PATH , ConfigValueFactory .fromAnyRef (tableInformation .getSd ().getLocation ()))
110
+ .withValue (SINK_COLUMNS , ConfigValueFactory .fromAnyRef (sinkFields ))
111
+ .withValue (PARTITION_BY , ConfigValueFactory .fromAnyRef (partitionKeys ));
120
112
if (!pluginConfig .hasPath (SAVE_MODE ) || StringUtils .isBlank (pluginConfig .getString (SAVE_MODE ))) {
121
113
pluginConfig = pluginConfig .withValue (SAVE_MODE , ConfigValueFactory .fromAnyRef (SaveMode .APPEND .toString ()));
122
114
}
@@ -131,4 +123,9 @@ public void prepare(Config pluginConfig) throws PrepareFailException {
131
123
}
132
124
this .pluginConfig = pluginConfig ;
133
125
}
126
+
127
+ @ Override
128
+ public Optional <SinkAggregatedCommitter <FileCommitInfo , FileAggregatedCommitInfo >> createAggregatedCommitter () throws IOException {
129
+ return Optional .of (new HiveSinkAggregatedCommitter (pluginConfig , dbName , tableName ));
130
+ }
134
131
}
0 commit comments