From ae81ac1dde955c29d861c3d867d4adacd2689e1a Mon Sep 17 00:00:00 2001 From: Vamshi Kolanu Date: Mon, 6 Mar 2023 17:09:04 -0800 Subject: [PATCH 1/2] HIVE-27133: Round off limit value greater than int_max to int_max --- .../hive/common/util/HiveStringUtils.java | 22 + .../hive/ql/parse/SemanticAnalyzer.java | 9 +- .../queries/clientpositive/limit_max_int.q | 6 + .../clientpositive/llap/limit_max_int.q.out | 856 ++++++++++++++++++ 4 files changed, 891 insertions(+), 2 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/limit_max_int.q create mode 100644 ql/src/test/results/clientpositive/llap/limit_max_int.q.out diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index ee94e413c726..c172adeb50f4 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -20,6 +20,7 @@ import java.io.PrintWriter; import java.io.StringWriter; +import java.math.BigInteger; import java.net.InetAddress; import java.net.URI; import java.net.URISyntaxException; @@ -1174,4 +1175,25 @@ private static boolean isComment(String line) { return lineTrimmed.startsWith("#") || lineTrimmed.startsWith("--"); } + /** + * Returns integer value of a string. If the string value exceeds max int, returns Integer.MAX_VALUE + * else if the string value is less than min int, returns Integer.MAX_VALUE + * + * @param value value of the input string + * @return integer + */ + public static int convertStringToBoundedInt(String value) { + try { + BigInteger bigIntValue = new BigInteger(value); + if (bigIntValue.compareTo(BigInteger.valueOf(Integer.MAX_VALUE)) > 0) { + return Integer.MAX_VALUE; + } else if ((bigIntValue.compareTo(BigInteger.valueOf(Integer.MIN_VALUE)) < 0)) { + return Integer.MIN_VALUE; + } else { + return bigIntValue.intValue(); + } + } catch(NumberFormatException nfe){ + throw new IllegalArgumentException("Please specify integer option. Provided option is " + value); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 067c35dac404..beae8baa925a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -309,6 +309,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; + import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ArrayListMultimap; @@ -319,6 +320,8 @@ import com.google.common.math.IntMath; import com.google.common.math.LongMath; +import org.apache.hive.common.util.HiveStringUtils; + /** * Implementation of the semantic analyzer. It generates the query plan. * There are other specific semantic analyzers for some hive operations such as @@ -1879,9 +1882,11 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) queryProperties.setHasLimit(true); if (ast.getChildCount() == 2) { qbp.setDestLimit(ctx_1.dest, - Integer.valueOf(ast.getChild(0).getText()), Integer.valueOf(ast.getChild(1).getText())); + HiveStringUtils.convertStringToBoundedInt(ast.getChild(0).getText()), + HiveStringUtils.convertStringToBoundedInt(ast.getChild(1).getText())); } else { - qbp.setDestLimit(ctx_1.dest, Integer.valueOf(0), Integer.valueOf(ast.getChild(0).getText())); + qbp.setDestLimit(ctx_1.dest, 0, + HiveStringUtils.convertStringToBoundedInt(ast.getChild(0).getText())); } break; diff --git a/ql/src/test/queries/clientpositive/limit_max_int.q b/ql/src/test/queries/clientpositive/limit_max_int.q new file mode 100644 index 000000000000..4ae569ca633a --- /dev/null +++ b/ql/src/test/queries/clientpositive/limit_max_int.q @@ -0,0 +1,6 @@ +--! qt:dataset:src +select key from src limit 214748364700; +select key from src where key = '238' limit 214748364700; +select * from src where key = '238' limit 214748364700; +select src.key, count(src.value) from src group by src.key limit 214748364700; +select * from ( select key from src limit 3) sq1 limit 214748364700; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/limit_max_int.q.out b/ql/src/test/results/clientpositive/llap/limit_max_int.q.out new file mode 100644 index 000000000000..a609b66e85f9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/limit_max_int.q.out @@ -0,0 +1,856 @@ +PREHOOK: query: select key from src limit 214748364700 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src limit 214748364700 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +265 +193 +401 +150 +273 +224 +369 +66 +128 +213 +146 +406 +429 +374 +152 +469 +145 +495 +37 +327 +281 +277 +209 +15 +82 +403 +166 +417 +430 +252 +292 +219 +287 +153 +193 +338 +446 +459 +394 +237 +482 +174 +413 +494 +207 +199 +466 +208 +174 +399 +396 +247 +417 +489 +162 +377 +397 +309 +365 +266 +439 +342 +367 +325 +167 +195 +475 +17 +113 +155 +203 +339 +0 +455 +128 +311 +316 +57 +302 +205 +149 +438 +345 +129 +170 +20 +489 +157 +378 +221 +92 +111 +47 +72 +4 +280 +35 +427 +277 +208 +356 +399 +169 +382 +498 +125 +386 +437 +469 +192 +286 +187 +176 +54 +459 +51 +138 +103 +239 +213 +216 +430 +278 +176 +289 +221 +65 +318 +332 +311 +275 +137 +241 +83 +333 +180 +284 +12 +230 +181 +67 +260 +404 +384 +489 +353 +373 +272 +138 +217 +84 +348 +466 +58 +8 +411 +230 +208 +348 +24 +463 +431 +179 +172 +42 +129 +158 +119 +496 +0 +322 +197 +468 +393 +454 +100 +298 +199 +191 +418 +96 +26 +165 +327 +230 +205 +120 +131 +51 +404 +43 +436 +156 +469 +468 +308 +95 +196 +288 +481 +457 +98 +282 +197 +187 +318 +318 +409 +470 +137 +369 +316 +169 +413 +85 +77 +0 +490 +87 +364 +179 +118 +134 +395 +282 +138 +238 +419 +15 +118 +72 +90 +307 +19 +435 +10 +277 +273 +306 +224 +309 +389 +327 +242 +369 +392 +272 +331 +401 +242 +452 +177 +226 +5 +497 +402 +396 +317 +395 +58 +35 +336 +95 +11 +168 +34 +229 +233 +143 +472 +322 +498 +160 +195 +42 +321 +430 +119 +489 +458 +78 +76 +41 +223 +492 +149 +449 +218 +228 +138 +453 +30 +209 +64 +468 +76 +74 +342 +69 +230 +33 +368 +103 +296 +113 +216 +367 +344 +167 +274 +219 +239 +485 +116 +223 +256 +263 +70 +487 +480 +401 +288 +191 +5 +244 +438 +128 +467 +432 +202 +316 +229 +469 +463 +280 +2 +35 +283 +331 +235 +80 +44 +193 +321 +335 +104 +466 +366 +175 +403 +483 +53 +105 +257 +406 +409 +190 +406 +401 +114 +258 +90 +203 +262 +348 +424 +12 +396 +201 +217 +164 +431 +454 +478 +298 +125 +431 +164 +424 +187 +382 +5 +70 +397 +480 +291 +24 +351 +255 +104 +70 +163 +438 +119 +414 +200 +491 +237 +439 +360 +248 +479 +305 +417 +199 +444 +120 +429 +169 +443 +323 +325 +277 +230 +478 +178 +468 +310 +317 +333 +493 +460 +207 +249 +265 +480 +83 +136 +353 +172 +214 +462 +233 +406 +133 +175 +189 +454 +375 +401 +421 +407 +384 +256 +26 +134 +67 +384 +379 +18 +462 +492 +100 +298 +9 +341 +498 +146 +458 +362 +186 +285 +348 +167 +18 +273 +183 +281 +344 +97 +469 +315 +84 +28 +37 +448 +152 +348 +307 +194 +414 +477 +222 +126 +90 +169 +403 +400 +200 +97 +PREHOOK: query: select key from src where key = '238' limit 214748364700 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src where key = '238' limit 214748364700 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +238 +PREHOOK: query: select * from src where key = '238' limit 214748364700 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src where key = '238' limit 214748364700 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +238 val_238 +PREHOOK: query: select src.key, count(src.value) from src group by src.key limit 214748364700 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src.key, count(src.value) from src group by src.key limit 214748364700 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +10 1 +100 2 +103 2 +104 2 +105 1 +11 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +12 2 +120 2 +125 2 +126 1 +128 3 +129 2 +131 1 +133 1 +134 2 +136 1 +137 2 +138 4 +143 1 +145 1 +146 2 +149 2 +15 2 +150 1 +152 2 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 2 +165 2 +166 1 +167 3 +168 1 +169 4 +17 1 +170 1 +172 2 +174 2 +175 2 +176 2 +177 1 +178 1 +179 2 +18 2 +180 1 +181 1 +183 1 +186 1 +187 3 +189 1 +19 1 +190 1 +191 2 +192 1 +193 3 +194 1 +195 2 +196 1 +197 2 +199 3 +2 1 +20 1 +200 2 +201 1 +202 1 +203 2 +205 2 +207 2 +208 3 +209 2 +213 2 +214 1 +216 2 +217 2 +218 1 +219 2 +221 2 +222 1 +223 2 +224 2 +226 1 +228 1 +229 2 +230 5 +233 2 +235 1 +237 2 +238 2 +239 2 +24 2 +241 1 +242 2 +244 1 +247 1 +248 1 +249 1 +252 1 +255 2 +256 2 +257 1 +258 1 +26 2 +260 1 +262 1 +263 1 +265 2 +266 1 +27 1 +272 2 +273 3 +274 1 +275 1 +277 4 +278 2 +28 1 +280 2 +281 2 +282 2 +283 1 +284 1 +285 1 +286 1 +287 1 +288 2 +289 1 +291 1 +292 1 +296 1 +298 3 +30 1 +302 1 +305 1 +306 1 +307 2 +308 1 +309 2 +310 1 +311 3 +315 1 +316 3 +317 2 +318 3 +321 2 +322 2 +323 1 +325 2 +327 3 +33 1 +331 2 +332 1 +333 2 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 2 +344 2 +345 1 +348 5 +35 3 +351 1 +353 2 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 2 +368 1 +369 3 +37 2 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 2 +384 3 +386 1 +389 1 +392 1 +393 1 +394 1 +395 2 +396 3 +397 2 +399 2 +4 1 +400 1 +401 5 +402 1 +403 3 +404 2 +406 4 +407 1 +409 3 +41 1 +411 1 +413 2 +414 2 +417 3 +418 1 +419 1 +42 2 +421 1 +424 2 +427 1 +429 2 +43 1 +430 3 +431 3 +432 1 +435 1 +436 1 +437 1 +438 3 +439 2 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +47 1 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +5 3 +51 2 +53 1 +54 1 +57 1 +58 2 +64 1 +65 1 +66 1 +67 2 +69 1 +70 3 +72 2 +74 1 +76 2 +77 1 +78 1 +8 1 +80 1 +82 1 +83 2 +84 2 +85 1 +86 1 +87 1 +9 1 +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: select * from ( select key from src limit 3) sq1 limit 214748364700 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from ( select key from src limit 3) sq1 limit 214748364700 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +86 +311 From 3caa5015737abe109a6f75797f9c60e8e699f8fc Mon Sep 17 00:00:00 2001 From: Vamshi Kolanu Date: Fri, 10 Mar 2023 14:46:33 -0800 Subject: [PATCH 2/2] addressed comments - Removed extra line and fixed the typo in comments --- .../src/java/org/apache/hive/common/util/HiveStringUtils.java | 2 +- .../java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index c172adeb50f4..a3a8758b5ce7 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -1177,7 +1177,7 @@ private static boolean isComment(String line) { /** * Returns integer value of a string. If the string value exceeds max int, returns Integer.MAX_VALUE - * else if the string value is less than min int, returns Integer.MAX_VALUE + * else if the string value is less than min int, returns Integer.MIN_VALUE * * @param value value of the input string * @return integer diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index beae8baa925a..0b06d69e9edd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -309,7 +309,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; - import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ArrayListMultimap;