Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-23599][SQL] Add a UUID generator from Pseudo-Random Numbers
## What changes were proposed in this pull request? This patch adds a UUID generator from Pseudo-Random Numbers. We can use it later to have deterministic `UUID()` expression. ## How was this patch tested? Added unit tests. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #20817 from viirya/SPARK-23599.
- Loading branch information
1 parent
745c8c0
commit 4de638c
Showing
2 changed files
with
100 additions
and
0 deletions.
There are no files selected for viewing
43 changes: 43 additions & 0 deletions
43
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.catalyst.util | ||
|
||
import java.util.UUID | ||
|
||
import org.apache.commons.math3.random.MersenneTwister | ||
|
||
import org.apache.spark.unsafe.types.UTF8String | ||
|
||
/** | ||
* This class is used to generate a UUID from Pseudo-Random Numbers. | ||
* | ||
* For the algorithm, see RFC 4122: A Universally Unique IDentifier (UUID) URN Namespace, | ||
* section 4.4 "Algorithms for Creating a UUID from Truly Random or Pseudo-Random Numbers". | ||
*/ | ||
case class RandomUUIDGenerator(randomSeed: Long) { | ||
private val random = new MersenneTwister(randomSeed) | ||
|
||
def getNextUUID(): UUID = { | ||
val mostSigBits = (random.nextLong() & 0xFFFFFFFFFFFF0FFFL) | 0x0000000000004000L | ||
val leastSigBits = (random.nextLong() | 0x8000000000000000L) & 0xBFFFFFFFFFFFFFFFL | ||
|
||
new UUID(mostSigBits, leastSigBits) | ||
} | ||
|
||
def getNextUUIDUTF8String(): UTF8String = UTF8String.fromString(getNextUUID().toString()) | ||
} |
57 changes: 57 additions & 0 deletions
57
...catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.catalyst.util | ||
|
||
import scala.util.Random | ||
|
||
import org.apache.spark.SparkFunSuite | ||
|
||
class RandomUUIDGeneratorSuite extends SparkFunSuite { | ||
test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") { | ||
val generator = RandomUUIDGenerator(new Random().nextLong()) | ||
for (_ <- 0 to 100) { | ||
val uuid = generator.getNextUUID() | ||
assert(uuid.version() == 4) | ||
assert(uuid.variant() == 2) | ||
} | ||
} | ||
|
||
test("UUID from RandomUUIDGenerator should be deterministic") { | ||
val r1 = new Random(100) | ||
val generator1 = RandomUUIDGenerator(r1.nextLong()) | ||
val r2 = new Random(100) | ||
val generator2 = RandomUUIDGenerator(r2.nextLong()) | ||
val r3 = new Random(101) | ||
val generator3 = RandomUUIDGenerator(r3.nextLong()) | ||
|
||
for (_ <- 0 to 100) { | ||
val uuid1 = generator1.getNextUUID() | ||
val uuid2 = generator2.getNextUUID() | ||
val uuid3 = generator3.getNextUUID() | ||
assert(uuid1 == uuid2) | ||
assert(uuid1 != uuid3) | ||
} | ||
} | ||
|
||
test("Get UTF8String UUID") { | ||
val generator = RandomUUIDGenerator(new Random().nextLong()) | ||
val utf8StringUUID = generator.getNextUUIDUTF8String() | ||
val uuid = java.util.UUID.fromString(utf8StringUUID.toString) | ||
assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString) | ||
} | ||
} |