# Apache Hudi Query Tests

Author: Gary Stafford
Date: 2022-12-08

In [None]:
## Amazon EMR cluster configuration used for this notebook

# aws emr create-cluster \
#     --os-release-label 2.0.20221103.3 --termination-protected \
#     --applications Name=Hadoop Name=Spark Name=JupyterEnterpriseGateway Name=Livy Name=JupyterHub \
#     --ec2-attributes '{"KeyName":"emr-demo-111222333444-us-east-1","InstanceProfile":"EMR_EC2_DefaultRole","SubnetId":"subnet-067e7e7ee0fd99b2b","EmrManagedSlaveSecurityGroup":"sg-0cc2191e5af867588","EmrManagedMasterSecurityGroup":"sg-0be73448b3814bbc7"}' \
#     --release-label emr-6.8.0 --log-uri 's3n://aws-logs-111222333444-us-east-1/elasticmapreduce/' \
#     --instance-groups '[{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"MASTER","InstanceType":"m5.xlarge","Name":"Master - 1"},{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"CORE","InstanceType":"m5.xlarge","Name":"Core - 2"}]' \
#     --configurations '[{"Classification":"spark-hive-site","Properties":{"hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"}}]' \
#     --auto-scaling-role EMR_AutoScaling_DefaultRole --ebs-root-volume-size 100 --service-role EMR_DefaultRole --enable-debugging \
#     --name 'DemoCluster_6_8' --scale-down-behavior TERMINATE_AT_TASK_COMPLETION --region us-east-1

In [None]:
# supplied config

In [None]:
# %%configure -f

# { "conf": {
#         "spark.pyspark.virtualenv.enabled": "false",
#         "spark.jars": "hdfs:///apps/hudi/lib/hudi-spark-bundle.jar",
#         "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
#         "spark.sql.catalog.spark_catalog": "org.apache.spark.sql.hudi.catalog.HoodieCatalog",
#         "spark.sql.extensions": "org.apache.spark.sql.hudi.HoodieSparkSessionExtension",
#         "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory",
#         "spark.hadoop.hive.metastore.schema.verification": "false",
#         "spark.hadoop.fs.s3.canned.acl": "AuthenticatedRead",
#         "spark.sql.catalogImplementation": "hive"
# }}

In [None]:
# default aws hudi emr config
# works fine
# https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hudi-work-with-dataset.html

In [None]:
# %%configure -f

# {
#     "conf": {
#         "spark.jars":
#             "hdfs:///apps/hudi/lib/hudi-spark-bundle.jar,hdfs:///apps/hudi/lib/spark-avro.jar",
#         "spark.serializer":
#             "org.apache.spark.serializer.KryoSerializer",
#         "spark.sql.hive.convertMetastoreParquet":
#             "false"
#     }
# }

In [None]:
# combined configs - default aws hudi emr config and emr glue data catalog config
# https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hudi-work-with-dataset.html
# https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/metastore-config.html#glue-metastore

# Note that "spark.hadoop.hive.metastore.client.factory.class" is extraneous since it was set in the EMR cluster config when provisioned (see first cell)

In [18]:
%%configure -f

{
    "conf": {
        "spark.jars":
            "hdfs:///apps/hudi/lib/hudi-spark-bundle.jar,hdfs:///apps/hudi/lib/spark-avro.jar",
        "spark.serializer":
            "org.apache.spark.serializer.KryoSerializer",
        "spark.sql.hive.convertMetastoreParquet":
            "false",
        "spark.hadoop.hive.metastore.client.factory.class":
            "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
    }
}

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,User,Current session?
13,application_1670528992887_0014,pyspark,idle,Link,Link,,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


ID,YARN Application ID,Kind,State,Spark UI,Driver log,User,Current session?
10,application_1670528992887_0011,pyspark,idle,Link,Link,,
13,application_1670528992887_0014,pyspark,idle,Link,Link,,✔


In [2]:
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .enableHiveSupport() \
    .getOrCreate()

VBox()

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,User,Current session?
12,application_1670528992887_0013,pyspark,idle,Link,Link,,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [3]:
# amazon s3 path to hudi table key (sub-directory)

base_path = 's3://open-data-lake-demo-us-east-1/hudi_demo'

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [None]:
# https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hudi-work-with-dataset.html#emr-hudi-dataframe

# Create a DataFrame
input_df = spark.createDataFrame([
    ("100", "2015-01-01", "2015-01-02T13:51:39.340396Z"),
    ("101", "2015-01-01", "2015-01-02T12:14:58.597216Z"),
    ("102", "2015-01-01", "2015-01-02T13:51:40.417052Z"),
    ("103", "2015-01-01", "2015-01-02T13:51:40.519832Z"),
    ("104", "2015-01-02", "2015-01-02T12:15:00.512679Z"),
    ("105", "2015-01-02", "2015-01-02T13:51:42.248818Z"),
], ["id", "creation_date", "last_update_time"])

# Specify common DataSourceWriteOptions in the single hudi options variable
# https://hudi.apache.org/docs/configurations
hudi_write_options = {
    'hoodie.datasource.write.operation':
        'upsert',
    'hoodie.table.name':
        'my_hudi_table',
    'hoodie.datasource.write.table.type':
        'COPY_ON_WRITE',
    'hoodie.datasource.write.recordkey.field':
        'id',
    'hoodie.datasource.write.partitionpath.field':
        'creation_date',
    'hoodie.datasource.write.precombine.field':
        'last_update_time',
    'hoodie.datasource.hive_sync.enable':
        'true',
    'hoodie.datasource.hive_sync.database':
        'default',
    'hoodie.datasource.hive_sync.table':
        'my_hudi_table',
    'hoodie.datasource.hive_sync.partition_fields':
        'creation_date',
    'hoodie.datasource.hive_sync.partition_extractor_class':
        'org.apache.hudi.hive.MultiPartKeysValueExtractor',
    'hoodie.datasource.hive_sync.mode':
        'hms'
}

# Write a DataFrame as a Hudi dataset
input_df.write \
    .format('org.apache.hudi') \
    .options(**hudi_write_options) \
    .mode('append') \
    .save(base_path)

In [13]:
# Create a DataFrame
input_df = spark.createDataFrame([
    ("110", "2016-01-02", "2016-01-02T13:51:39.340396Z"),
    ("111", "2016-01-02", "2016-01-02T12:14:58.597216Z"),
    ("112", "2016-01-02", "2016-01-02T13:51:40.417052Z"),
    ("113", "2016-01-02", "2016-01-02T13:51:40.519832Z"),
    ("114", "2016-01-02", "2016-01-02T12:15:00.512679Z"),
    ("115", "2016-01-02", "2016-01-02T13:51:42.248818Z"),
], ["id", "creation_date", "last_update_time"])

# Specify common DataSourceWriteOptions in the single hudi options variable
# https://hudi.apache.org/docs/configurations
hudi_write_options = {
    'hoodie.datasource.write.operation':
        'upsert',
    'hoodie.table.name':
        'my_hudi_table',
    'hoodie.datasource.write.table.type':
        'COPY_ON_WRITE',
    'hoodie.datasource.write.recordkey.field':
        'id',
    'hoodie.datasource.write.partitionpath.field':
        'creation_date',
    'hoodie.datasource.write.precombine.field':
        'last_update_time',
    'hoodie.datasource.hive_sync.enable':
        'true',
    'hoodie.datasource.hive_sync.database':
        'default',
    'hoodie.datasource.hive_sync.table':
        'my_hudi_table',
    'hoodie.datasource.hive_sync.partition_fields':
        'creation_date',
    'hoodie.datasource.hive_sync.partition_extractor_class':
        'org.apache.hudi.hive.MultiPartKeysValueExtractor',
    'hoodie.datasource.hive_sync.mode':
        'hms'
}

# Write a DataFrame as a Hudi dataset
input_df.write \
    .format('org.apache.hudi') \
    .options(**hudi_write_options) \
    .mode('append') \
    .save(base_path)

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [14]:
# load data into a dataframe from s3

snapshot_query_df = spark.read \
    .format('org.apache.hudi') \
    .load(base_path + '/*/*')

snapshot_query_df.show(25)

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+-------------+--------------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|creation_date|    last_update_time|
+-------------------+--------------------+------------------+----------------------+--------------------+---+-------------+--------------------+
|  20221209033946565|20221209033946565...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|   2016-01-02|2016-01-02T12:15:...|
|  20221209033946565|20221209033946565...|               115|            2016-01-02|1c996d1d-3fd5-4ac...|115|   2016-01-02|2016-01-02T13:51:...|
|  20221209033946565|20221209033946565...|               110|            2016-01-02|1c996d1d-3fd5-4ac...|110|   2016-01-02|2016-01-02T13:51:...|
|  20221209033946565|20221209033946565...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|   2016-01-02|2016-01

In [5]:
# load data into a dataframe from glue data catalog

glue_query_df = spark.sql("select * from `default`.`my_hudi_table`;")

glue_query_df.show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|    last_update_time|creation_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|  20221209005913239|20221209005913239...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|2016-01-02T12:15:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               115|            2016-01-02|1c996d1d-3fd5-4ac...|115|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               110|            2016-01-02|1c996d1d-3fd5-4ac...|110|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|2016-01-02T12:14:...|

In [6]:
# load data into a dataframe from glue data catalog

as_of_time = '20221209033946565'

incremental_df = spark.read \
    .format('org.apache.hudi') \
    .option('as.of.instant', as_of_time) \
    .table('default.my_hudi_table')

incremental_df.show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|    last_update_time|creation_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|  20221209005913239|20221209005913239...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|2016-01-02T12:15:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               115|            2016-01-02|1c996d1d-3fd5-4ac...|115|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               110|            2016-01-02|1c996d1d-3fd5-4ac...|110|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|2016-01-02T12:14:...|

In [15]:
# load data into a dataframe from glue data catalog as of a date/time using SparkSQL
# throws error: java.lang.UnsupportedOperationException: Table default.my_hudi_table does not support time travel.

as_of_time = '20221209005913239'

incremental_df = spark.sql(
    'select * from default.my_hudi_table timestamp as of ' + as_of_time + ';')

incremental_df.show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

An error was encountered:
An error occurred while calling o88.sql.
: java.lang.UnsupportedOperationException: Table default.my_hudi_table does not support time travel.
	at org.apache.spark.sql.errors.QueryCompilationErrors$.tableNotSupportTimeTravelError(QueryCompilationErrors.scala:2412)
	at org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog.failTimeTravel(V2SessionCatalog.scala:83)
	at org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog.loadTable(V2SessionCatalog.scala:70)
	at org.apache.spark.sql.connector.catalog.CatalogV2Util$.loadTable(CatalogV2Util.scala:308)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.$anonfun$lookupRelation$3(Analyzer.scala:1206)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.$anonfun$lookupRelation$1(Analyzer.scala:1205)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark

In [17]:
%%sql

select * from default.my_hudi_table timestamp as of 20221209005913239;

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

An error was encountered:
An error occurred while calling o88.sql.
: java.lang.UnsupportedOperationException: Table default.my_hudi_table does not support time travel.
	at org.apache.spark.sql.errors.QueryCompilationErrors$.tableNotSupportTimeTravelError(QueryCompilationErrors.scala:2412)
	at org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog.failTimeTravel(V2SessionCatalog.scala:83)
	at org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog.loadTable(V2SessionCatalog.scala:70)
	at org.apache.spark.sql.connector.catalog.CatalogV2Util$.loadTable(CatalogV2Util.scala:308)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.$anonfun$lookupRelation$3(Analyzer.scala:1206)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.$anonfun$lookupRelation$1(Analyzer.scala:1205)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark

In [8]:
# incremental query
# load data into a dataframe from glue data catalog starting as a date/time
# https://hudi.apache.org/docs/quick-start-guide/#incremental-query

instant_time = '20221208000000000'

incremental_read_options = {
    'hoodie.datasource.query.type': 'incremental',
    'hoodie.datasource.read.begin.instanttime': instant_time,
}

incremental_df = spark.read \
    .format('org.apache.hudi') \
    .options(**incremental_read_options) \
    .load(base_path)

incremental_df.show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+-------------+--------------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|creation_date|    last_update_time|
+-------------------+--------------------+------------------+----------------------+--------------------+---+-------------+--------------------+
|  20221209005913239|20221209005913239...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|   2016-01-02|2016-01-02T12:15:...|
|  20221209005913239|20221209005913239...|               115|            2016-01-02|1c996d1d-3fd5-4ac...|115|   2016-01-02|2016-01-02T13:51:...|
|  20221209005913239|20221209005913239...|               110|            2016-01-02|1c996d1d-3fd5-4ac...|110|   2016-01-02|2016-01-02T13:51:...|
|  20221209005913239|20221209005913239...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|   2016-01-02|2016-01

In [9]:
# add contents of dataframe to a temporary view and then query using SparkSQL

incremental_df.createOrReplaceTempView('hudi_incremental')

spark.sql(
    'select * from `default`.`my_hudi_table` where id > 110 sort by id').show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|    last_update_time|creation_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|  20221209005913239|20221209005913239...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|2016-01-02T12:14:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               112|            2016-01-02|1c996d1d-3fd5-4ac...|112|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               113|            2016-01-02|1c996d1d-3fd5-4ac...|113|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|2016-01-02T12:15:...|

In [10]:
# time travel query
# https://hudi.apache.org/docs/0.11.1/quick-start-guide/#time-travel-query

spark.read \
    .format('org.apache.hudi') \
    .load(base_path) \
    .groupBy('_hoodie_commit_time').count().show()

instant0 = '2022-12-08 00:00:00.000'  # before any records were created - should see no records

instant1 = '2022-12-08 18:08:27.522'  # ids 100 - 105 created  - should see (6) new records

instant2 = '2022-12-08 18:13:30.830'  # ids 110 - 115 created - should see (12) new records

instant3 = '2022-12-08 18:52:24.525'  # ids 100 - 105 updated - should see (6) new records and (6) modified records

instant4 = '2022-12-08 19:15:27.968'  # ids 110 - 115 updated - should see (12) modified records

instant5 = '2022-12-09 00:59:13.239'  # ids 110 - 115 updated second time - should see (12) modified records

instant6 = '2022-12-09 03:39:46.565'  # ids 110 - 115 updated third time - should see (12) modified records

instant7 = '2022 12 09 00:00:00.000'  # after latest changes - should see (12) modified/latest records

incremental_df = spark.read \
    .format('org.apache.hudi') \
    .option('as.of.instant', instant4) \
    .load(base_path)

incremental_df.show(20)

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+-----+
|_hoodie_commit_time|count|
+-------------------+-----+
|  20221209005913239|    6|
|  20221208185224525|    6|
+-------------------+-----+

+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|    last_update_time|creation_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|  20221208191527968|20221208191527968...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|2016-01-02T12:15:...|   2016-01-02|
|  20221208191527968|20221208191527968...|               115|            2016-01-02|1c996d1d-3fd5-4ac...|115|2016-01-02T13:51:...|   2016-01-02|
|  20221208191527968|20221208191527968...|               110|            2016-01-02|1c996d1d-3fd5-4ac...|1

In [11]:
# incremental query
# load data into a dataframe from glue data catalog starting and ending with a date/time
# https://hudi.apache.org/docs/quick-start-guide/#incremental-query
# https://hudi.apache.org/docs/configurations/#hoodiedatasourcereadendinstanttime

begin_time = 20221208000000000
end_time = 20221209000000000

incremental_read_options = {
    'hoodie.datasource.query.type': 'incremental',
    'hoodie.datasource.read.begin.instanttime': begin_time,
    'hoodie.datasource.read.end.instanttime': end_time
}

incremental_df = spark.read \
    .format('org.apache.hudi') \
    .options(**incremental_read_options) \
    .load(base_path)

incremental_df.show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+-------------+--------------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|creation_date|    last_update_time|
+-------------------+--------------------+------------------+----------------------+--------------------+---+-------------+--------------------+
|  20221208191527968|20221208191527968...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|   2016-01-02|2016-01-02T12:15:...|
|  20221208191527968|20221208191527968...|               115|            2016-01-02|1c996d1d-3fd5-4ac...|115|   2016-01-02|2016-01-02T13:51:...|
|  20221208191527968|20221208191527968...|               110|            2016-01-02|1c996d1d-3fd5-4ac...|110|   2016-01-02|2016-01-02T13:51:...|
|  20221208191527968|20221208191527968...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|   2016-01-02|2016-01

In [12]:
# add contents of dataframe to a temporary view and then query using SparkSQL

incremental_df.createOrReplaceTempView('hudi_incremental')

spark.sql(
    'select * from `default`.`my_hudi_table` where id > 110 sort by id').show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name| id|    last_update_time|creation_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+--------------------+-------------+
|  20221209005913239|20221209005913239...|               111|            2016-01-02|1c996d1d-3fd5-4ac...|111|2016-01-02T12:14:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               112|            2016-01-02|1c996d1d-3fd5-4ac...|112|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               113|            2016-01-02|1c996d1d-3fd5-4ac...|113|2016-01-02T13:51:...|   2016-01-02|
|  20221209005913239|20221209005913239...|               114|            2016-01-02|1c996d1d-3fd5-4ac...|114|2016-01-02T12:15:...|