New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-23599][SQL] Add a UUID generator from Pseudo-Random Numbers #20817
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.catalyst.util | ||
|
||
import java.util.UUID | ||
|
||
import org.apache.commons.math3.random.MersenneTwister | ||
|
||
import org.apache.spark.unsafe.types.UTF8String | ||
|
||
/** | ||
* This class is used to generate a UUID from Pseudo-Random Numbers. | ||
* | ||
* For the algorithm, see RFC 4122: A Universally Unique IDentifier (UUID) URN Namespace, | ||
* section 4.4 "Algorithms for Creating a UUID from Truly Random or Pseudo-Random Numbers". | ||
*/ | ||
case class RandomUUIDGenerator(randomSeed: Long) { | ||
private val random = new MersenneTwister(randomSeed) | ||
|
||
def getNextUUID(): UUID = { | ||
val mostSigBits = (random.nextLong() & 0xFFFFFFFFFFFF0FFFL) | 0x0000000000004000L | ||
val leastSigBits = (random.nextLong() | 0x8000000000000000L) & 0xBFFFFFFFFFFFFFFFL | ||
|
||
new UUID(mostSigBits, leastSigBits) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to use a different RNG. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. Mersenne Twister is used in the update. |
||
} | ||
|
||
def getNextUUIDUTF8String(): UTF8String = UTF8String.fromString(getNextUUID().toString()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.catalyst.util | ||
|
||
import scala.util.Random | ||
|
||
import org.apache.spark.SparkFunSuite | ||
|
||
class RandomUUIDGeneratorSuite extends SparkFunSuite { | ||
test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") { | ||
val generator = RandomUUIDGenerator(new Random().nextLong()) | ||
for (_ <- 0 to 100) { | ||
val uuid = generator.getNextUUID() | ||
assert(uuid.version() == 4) | ||
assert(uuid.variant() == 2) | ||
} | ||
} | ||
|
||
test("UUID from RandomUUIDGenerator should be deterministic") { | ||
val r1 = new Random(100) | ||
val generator1 = RandomUUIDGenerator(r1.nextLong()) | ||
val r2 = new Random(100) | ||
val generator2 = RandomUUIDGenerator(r2.nextLong()) | ||
val r3 = new Random(101) | ||
val generator3 = RandomUUIDGenerator(r3.nextLong()) | ||
|
||
for (_ <- 0 to 100) { | ||
val uuid1 = generator1.getNextUUID() | ||
val uuid2 = generator2.getNextUUID() | ||
val uuid3 = generator3.getNextUUID() | ||
assert(uuid1 == uuid2) | ||
assert(uuid1 != uuid3) | ||
} | ||
} | ||
|
||
test("Get UTF8String UUID") { | ||
val generator = RandomUUIDGenerator(new Random().nextLong()) | ||
val utf8StringUUID = generator.getNextUUIDUTF8String() | ||
val uuid = java.util.UUID.fromString(utf8StringUUID.toString) | ||
assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps we should also create a version that creates a UTF8String directly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good. I've added it.