-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Description
1、shema.avsc:
{
"type":"record",
"name":"stock_ticks",
"fields":[{
"name": "uuid",
"type": "string"
}, {
"name": "ts",
"type": "long"
}, {
"name": "symbol",
"type": "string"
},{
"name": "year",
"type": "int"
},{
"name": "month",
"type": "int"
},{
"name": "high",
"type": "double"
},{
"name": "low",
"type": "double"
},{
"name": "key",
"type": "string"
},{
"name": "close",
"type": "double"
}, {
"name": "open",
"type": "double"
}, {
"name": "day",
"type":"string"
}
]}
2、hudi-conf.properties:
hoodie.datasource.write.recordkey.field=uuid
hoodie.datasource.write.partitionpath.field=ts
bootstrap.servers=hd-node-3-41.wakedata.com:6667
hoodie.deltastreamer.keygen.timebased.timestamp.type=EPOCHMILLISECONDS
hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator
hoodie.embed.timeline.server=false
hoodie.deltastreamer.schemaprovider.source.schema.file=hdfs://HDFSCluster/hudi/test/config/flink/schema.avsc
hoodie.deltastreamer.schemaprovider.target.schema.file=hdfs://HDFSCluster/hudi/test/config/flink/schema.avsc
3、HoodieFlinkStreamer Program arguments:
--kafka-bootstrap-servers hd-node-3-41.wakedata.com:6667 --kafka-group-id hudi --kafka-topic hudi_json_topic --target-table hudi_on_flink_local_9 --table-type COPY_ON_WRITE --target-base-path hdfs://HDFSCluster/hudi/test/data/hudi_on_flink_local_9 --props hdfs://HDFSCluster/hudi/test/config/flink/hudi-conf.properties --checkpoint-interval 60000 --flink-checkpoint-path hdfs://HDFSCluster/hudi/hudi_on_flink_local_9
4、Topic data:
{"uuid":"2", "ts":1608538277000, "symbol":"symbol1", "year":2020,"month":12,"high":0.1, "low":0.01, "key":"key1","close":0.1,"open":0.2, "day":"21"}
{"uuid":"2", "ts":1608538277000, "symbol":"symbol2", "year":2020,"month":12,"high":0.1, "low":0.01, "key":"key2","close":0.1,"open":0.2, "day":"21"}
{"uuid":"2", "ts":1608538277000, "symbol":"symbol3", "year":2020,"month":12,"high":0.1, "low":0.01, "key":"key3","close":0.1,"open":0.2, "day":"21"}
{"uuid":"2", "ts":1608538277000, "symbol":"symbol4", "year":2020,"month":12,"high":0.1, "low":0.01, "key":"key4","close":0.1,"open":0.2, "day":"21"}
5、Hive create table:
add jars hdfs://HDFSCluster/hudi/test/hudi-hadoop-mr-bundle-0.6.1-SNAPSHOT.jar;
CREATE EXTERNAL TABLE hudi_on_flink_local_9(
_hoodie_commit_time string,
_hoodie_commit_seqno string,
_hoodie_record_key string,
_hoodie_partition_path string,
_hoodie_file_name string,
uuid string,
ts bigint,
symbol string,
year int,
month int,
high double,
low double,
key string,
close double,
open double,
day string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://HDFSCluster/hudi/test/data/hudi_on_flink_local_9';
when i query hive table all upset history is appear! but spark is not.
help me, thinks very much!