From 022a95e780ff175478e210bb3aa66f3bae4286af Mon Sep 17 00:00:00 2001 From: djaiswal Date: Wed, 10 Jan 2018 15:53:34 -0800 Subject: [PATCH 1/3] HIVE-18400 : load data should rename files consistent with insert statements (bucketed tables only) Part2 (Deepak Jaiswal, reviewed by Eugene Koifman) --- data/files/bucketed_files/000000_0 | 493 +++++++++++++++++ data/files/bucketed_files/000001_0 | 507 ++++++++++++++++++ data/scripts/q_test_init.sql | 4 +- .../queries/clientpositive/smb_mapjoin_1.q | 6 +- .../queries/clientpositive/smb_mapjoin_10.q | 8 +- .../queries/clientpositive/smb_mapjoin_2.q | 8 +- .../queries/clientpositive/smb_mapjoin_25.q | 6 +- .../queries/clientpositive/smb_mapjoin_3.q | 6 +- .../queries/clientpositive/smb_mapjoin_4.q | 6 +- .../queries/clientpositive/smb_mapjoin_5.q | 6 +- .../beeline/smb_mapjoin_1.q.out | 12 +- .../beeline/smb_mapjoin_10.q.out | 16 +- .../beeline/smb_mapjoin_2.q.out | 12 +- .../beeline/smb_mapjoin_3.q.out | 12 +- .../clientpositive/llap/smb_mapjoin_4.q.out | 12 +- .../clientpositive/llap/smb_mapjoin_5.q.out | 12 +- .../clientpositive/smb_mapjoin_1.q.out | 12 +- .../clientpositive/smb_mapjoin_10.q.out | 16 +- .../clientpositive/smb_mapjoin_2.q.out | 12 +- .../clientpositive/smb_mapjoin_25.q.out | 12 +- .../clientpositive/smb_mapjoin_3.q.out | 12 +- .../clientpositive/spark/smb_mapjoin_1.q.out | 12 +- .../clientpositive/spark/smb_mapjoin_10.q.out | 16 +- .../clientpositive/spark/smb_mapjoin_2.q.out | 12 +- .../clientpositive/spark/smb_mapjoin_25.q.out | 12 +- .../clientpositive/spark/smb_mapjoin_3.q.out | 12 +- .../clientpositive/spark/smb_mapjoin_4.q.out | 12 +- .../clientpositive/spark/smb_mapjoin_5.q.out | 12 +- 28 files changed, 1139 insertions(+), 139 deletions(-) create mode 100644 data/files/bucketed_files/000000_0 create mode 100644 data/files/bucketed_files/000001_0 diff --git a/data/files/bucketed_files/000000_0 b/data/files/bucketed_files/000000_0 new file mode 100644 index 000000000000..c11aec09d961 --- /dev/null +++ b/data/files/bucketed_files/000000_0 @@ -0,0 +1,493 @@ +474val_475 +62val_63 +468val_469 +272val_273 +448val_449 +246val_247 +440val_441 +278val_279 +296val_297 +428val_429 +126val_127 +106val_107 +356val_357 +490val_491 +402val_403 +128val_129 +10val_11 +226val_227 +110val_111 +0val_1 +240val_241 +286val_287 +408val_409 +476val_477 +482val_483 +48val_49 +424val_425 +226val_227 +494val_495 +488val_489 +94val_95 +50val_51 +402val_403 +128val_129 +468val_469 +314val_315 +224val_225 +344val_345 +4val_5 +206val_207 +114val_115 +56val_57 +114val_115 +254val_255 +390val_391 +304val_305 +264val_265 +196val_197 +238val_239 +20val_21 +492val_493 +82val_83 +58val_59 +86val_87 +438val_439 +360val_361 +222val_223 +42val_43 +338val_339 +68val_69 +16val_17 +492val_493 +376val_377 +120val_121 +306val_307 +426val_427 +132val_133 +446val_447 +386val_387 +388val_389 +184val_185 +284val_285 +246val_247 +262val_263 +122val_123 +438val_439 +390val_391 +352val_353 +226val_227 +328val_329 +382val_383 +342val_343 +480val_481 +102val_103 +480val_481 +318val_319 +392val_393 +476val_477 +258val_259 +174val_175 +252val_253 +114val_115 +264val_265 +48val_49 +336val_337 +340val_341 +390val_391 +484val_485 +6val_7 +260val_261 +2val_3 +170val_171 +164val_165 +118val_119 +310val_311 +104val_105 +80val_81 +326val_327 +450val_451 +140val_141 +212val_213 +308val_309 +30val_31 +358val_359 +416val_417 +42val_43 +386val_387 +454val_455 +364val_365 +20val_21 +52val_53 +40val_41 +8val_9 +168val_169 +384val_385 +324val_325 +310val_311 +206val_207 +404val_405 +206val_207 +226val_227 +262val_263 +260val_261 +328val_329 +322val_323 +122val_123 +404val_405 +384val_385 +76val_77 +116val_117 +42val_43 +104val_105 +406val_407 +32val_33 +132val_133 +192val_193 +58val_59 +70val_71 +356val_357 +352val_353 +52val_53 +330val_331 +138val_139 +160val_161 +454val_455 +76val_77 +174val_175 +412val_413 +16val_17 +204val_205 +126val_127 +274val_275 +374val_375 +494val_495 +216val_217 +470val_471 +196val_197 +302val_303 +450val_451 +12val_13 +398val_399 +334val_335 +384val_385 +60val_61 +442val_443 +52val_53 +404val_405 +446val_447 +300val_301 +0val_1 +268val_269 +392val_393 +104val_105 +436val_437 +156val_157 +118val_119 +172val_173 +244val_245 +6val_7 +284val_285 +164val_165 +136val_137 +462val_463 +432val_433 +496val_497 +144val_145 +408val_409 +152val_153 +382val_383 +348val_349 +122val_123 +292val_293 +182val_183 +474val_475 +310val_311 +52val_53 +486val_487 +152val_153 +378val_379 +414val_415 +256val_257 +292val_293 +412val_413 +40val_41 +478val_479 +178val_179 +100val_101 +156val_157 +228val_229 +22val_23 +248val_249 +402val_403 +62val_63 +162val_163 +244val_245 +276val_277 +46val_47 +78val_79 +134val_135 +196val_197 +410val_411 +82val_83 +440val_441 +100val_101 +308val_309 +430val_431 +468val_469 +152val_153 +138val_139 +76val_77 +300val_301 +478val_479 +118val_119 +178val_179 +242val_243 +244val_245 +238val_238 +86val_86 +278val_278 +98val_98 +484val_484 +150val_150 +224val_224 +66val_66 +128val_128 +146val_146 +406val_406 +374val_374 +152val_152 +82val_82 +166val_166 +430val_430 +252val_252 +292val_292 +338val_338 +446val_446 +394val_394 +482val_482 +174val_174 +494val_494 +466val_466 +208val_208 +174val_174 +396val_396 +162val_162 +266val_266 +342val_342 +0val_0 +128val_128 +316val_316 +302val_302 +438val_438 +170val_170 +20val_20 +378val_378 +92val_92 +72val_72 +4val_4 +280val_280 +208val_208 +356val_356 +382val_382 +498val_498 +386val_386 +192val_192 +286val_286 +176val_176 +54val_54 +138val_138 +216val_216 +430val_430 +278val_278 +176val_176 +318val_318 +332val_332 +180val_180 +284val_284 +12val_12 +230val_230 +260val_260 +404val_404 +384val_384 +272val_272 +138val_138 +84val_84 +348val_348 +466val_466 +58val_58 +8val_8 +230val_230 +208val_208 +348val_348 +24val_24 +172val_172 +42val_42 +158val_158 +496val_496 +0val_0 +322val_322 +468val_468 +454val_454 +100val_100 +298val_298 +418val_418 +96val_96 +26val_26 +230val_230 +120val_120 +404val_404 +436val_436 +156val_156 +468val_468 +308val_308 +196val_196 +288val_288 +98val_98 +282val_282 +318val_318 +318val_318 +470val_470 +316val_316 +0val_0 +490val_490 +364val_364 +118val_118 +134val_134 +282val_282 +138val_138 +238val_238 +118val_118 +72val_72 +90val_90 +10val_10 +306val_306 +224val_224 +242val_242 +392val_392 +272val_272 +242val_242 +452val_452 +226val_226 +402val_402 +396val_396 +58val_58 +336val_336 +168val_168 +34val_34 +472val_472 +322val_322 +498val_498 +160val_160 +42val_42 +430val_430 +458val_458 +78val_78 +76val_76 +492val_492 +218val_218 +228val_228 +138val_138 +30val_30 +64val_64 +468val_468 +76val_76 +74val_74 +342val_342 +230val_230 +368val_368 +296val_296 +216val_216 +344val_344 +274val_274 +116val_116 +256val_256 +70val_70 +480val_480 +288val_288 +244val_244 +438val_438 +128val_128 +432val_432 +202val_202 +316val_316 +280val_280 +2val_2 +80val_80 +44val_44 +104val_104 +466val_466 +366val_366 +406val_406 +190val_190 +406val_406 +114val_114 +258val_258 +90val_90 +262val_262 +348val_348 +424val_424 +12val_12 +396val_396 +164val_164 +454val_454 +478val_478 +298val_298 +164val_164 +424val_424 +382val_382 +70val_70 +480val_480 +24val_24 +104val_104 +70val_70 +438val_438 +414val_414 +200val_200 +360val_360 +248val_248 +444val_444 +120val_120 +230val_230 +478val_478 +178val_178 +468val_468 +310val_310 +460val_460 +480val_480 +136val_136 +172val_172 +214val_214 +462val_462 +406val_406 +454val_454 +384val_384 +256val_256 +26val_26 +134val_134 +384val_384 +18val_18 +462val_462 +492val_492 +100val_100 +298val_298 +498val_498 +146val_146 +458val_458 +362val_362 +186val_186 +348val_348 +18val_18 +344val_344 +84val_84 +28val_28 +448val_448 +152val_152 +348val_348 +194val_194 +414val_414 +222val_222 +126val_126 +90val_90 +400val_400 +200val_200 diff --git a/data/files/bucketed_files/000001_0 b/data/files/bucketed_files/000001_0 new file mode 100644 index 000000000000..49bfa8549836 --- /dev/null +++ b/data/files/bucketed_files/000001_0 @@ -0,0 +1,507 @@ +281val_282 +179val_180 +291val_292 +271val_272 +217val_218 +135val_136 +167val_168 +423val_424 +413val_414 +245val_246 +455val_456 +425val_426 +241val_242 +177val_178 +231val_232 +287val_288 +31val_32 +373val_374 +447val_448 +443val_444 +175val_176 +147val_148 +249val_250 +21val_22 +273val_274 +441val_442 +371val_372 +153val_154 +217val_218 +33val_34 +35val_36 +421val_422 +243val_244 +133val_134 +333val_334 +15val_16 +391val_392 +343val_344 +275val_276 +485val_486 +293val_294 +241val_242 +85val_86 +477val_478 +455val_456 +99val_100 +335val_336 +367val_368 +59val_60 +485val_486 +393val_394 +349val_350 +11val_12 +161val_162 +123val_124 +409val_410 +265val_266 +497val_498 +63val_64 +277val_278 +135val_136 +3val_4 +101val_102 +331val_332 +209val_210 +281val_282 +239val_240 +389val_390 +235val_236 +165val_166 +11val_12 +129val_130 +257val_258 +71val_72 +289val_290 +453val_454 +421val_422 +5val_6 +249val_250 +323val_324 +467val_468 +411val_412 +175val_176 +429val_430 +281val_282 +185val_186 +147val_148 +119val_120 +473val_474 +347val_348 +213val_214 +393val_394 +427val_428 +291val_292 +65val_66 +121val_122 +375val_376 +191val_192 +129val_130 +197val_198 +491val_492 +369val_370 +351val_352 +23val_24 +349val_350 +497val_498 +487val_488 +331val_332 +409val_410 +475val_476 +463val_464 +183val_184 +177val_178 +399val_400 +15val_16 +149val_150 +77val_78 +275val_276 +87val_88 +161val_162 +75val_76 +443val_444 +407val_408 +189val_190 +305val_306 +367val_368 +349val_350 +51val_52 +355val_356 +363val_364 +19val_20 +117val_118 +47val_48 +121val_122 +241val_242 +405val_406 +239val_240 +119val_120 +11val_12 +49val_50 +143val_144 +153val_154 +341val_342 +21val_22 +105val_106 +157val_158 +199val_200 +375val_376 +89val_90 +363val_364 +395val_396 +347val_348 +77val_78 +293val_294 +89val_90 +461val_462 +313val_314 +381val_382 +385val_386 +259val_260 +69val_70 +303val_304 +245val_246 +241val_242 +469val_470 +481val_482 +303val_304 +93val_94 +277val_278 +451val_452 +53val_54 +351val_352 +443val_444 +399val_400 +205val_206 +21val_22 +317val_318 +209val_210 +429val_430 +267val_268 +257val_258 +375val_376 +489val_490 +295val_296 +105val_106 +439val_440 +457val_458 +93val_94 +119val_120 +375val_376 +391val_392 +287val_288 +375val_376 +437val_438 +35val_36 +435val_436 +29val_30 +151val_152 +491val_492 +21val_22 +295val_296 +93val_94 +61val_62 +407val_408 +439val_440 +341val_342 +335val_336 +349val_350 +371val_372 +123val_124 +355val_356 +87val_88 +427val_428 +353val_354 +261val_262 +65val_66 +371val_372 +97val_98 +495val_496 +385val_386 +49val_50 +389val_390 +259val_260 +97val_98 +125val_126 +243val_244 +151val_152 +415val_416 +401val_402 +363val_364 +117val_118 +459val_460 +137val_138 +341val_342 +379val_380 +215val_216 +157val_158 +431val_432 +407val_408 +371val_372 +309val_310 +135val_136 +161val_162 +161val_162 +337val_338 +91val_92 +1val_2 +89val_90 +457val_458 +29val_30 +285val_286 +35val_36 +227val_228 +395val_396 +311val_311 +27val_27 +165val_165 +409val_409 +255val_255 +265val_265 +193val_193 +401val_401 +273val_273 +369val_369 +213val_213 +429val_429 +469val_469 +145val_145 +495val_495 +37val_37 +327val_327 +281val_281 +277val_277 +209val_209 +15val_15 +403val_403 +417val_417 +219val_219 +287val_287 +153val_153 +193val_193 +459val_459 +237val_237 +413val_413 +207val_207 +199val_199 +399val_399 +247val_247 +417val_417 +489val_489 +377val_377 +397val_397 +309val_309 +365val_365 +439val_439 +367val_367 +325val_325 +167val_167 +195val_195 +475val_475 +17val_17 +113val_113 +155val_155 +203val_203 +339val_339 +455val_455 +311val_311 +57val_57 +205val_205 +149val_149 +345val_345 +129val_129 +489val_489 +157val_157 +221val_221 +111val_111 +47val_47 +35val_35 +427val_427 +277val_277 +399val_399 +169val_169 +125val_125 +437val_437 +469val_469 +187val_187 +459val_459 +51val_51 +103val_103 +239val_239 +213val_213 +289val_289 +221val_221 +65val_65 +311val_311 +275val_275 +137val_137 +241val_241 +83val_83 +333val_333 +181val_181 +67val_67 +489val_489 +353val_353 +373val_373 +217val_217 +411val_411 +463val_463 +431val_431 +179val_179 +129val_129 +119val_119 +197val_197 +393val_393 +199val_199 +191val_191 +165val_165 +327val_327 +205val_205 +131val_131 +51val_51 +43val_43 +469val_469 +95val_95 +481val_481 +457val_457 +197val_197 +187val_187 +409val_409 +137val_137 +369val_369 +169val_169 +413val_413 +85val_85 +77val_77 +87val_87 +179val_179 +395val_395 +419val_419 +15val_15 +307val_307 +19val_19 +435val_435 +277val_277 +273val_273 +309val_309 +389val_389 +327val_327 +369val_369 +331val_331 +401val_401 +177val_177 +5val_5 +497val_497 +317val_317 +395val_395 +35val_35 +95val_95 +11val_11 +229val_229 +233val_233 +143val_143 +195val_195 +321val_321 +119val_119 +489val_489 +41val_41 +223val_223 +149val_149 +449val_449 +453val_453 +209val_209 +69val_69 +33val_33 +103val_103 +113val_113 +367val_367 +167val_167 +219val_219 +239val_239 +485val_485 +223val_223 +263val_263 +487val_487 +401val_401 +191val_191 +5val_5 +467val_467 +229val_229 +469val_469 +463val_463 +35val_35 +283val_283 +331val_331 +235val_235 +193val_193 +321val_321 +335val_335 +175val_175 +403val_403 +483val_483 +53val_53 +105val_105 +257val_257 +409val_409 +401val_401 +203val_203 +201val_201 +217val_217 +431val_431 +125val_125 +431val_431 +187val_187 +5val_5 +397val_397 +291val_291 +351val_351 +255val_255 +163val_163 +119val_119 +491val_491 +237val_237 +439val_439 +479val_479 +305val_305 +417val_417 +199val_199 +429val_429 +169val_169 +443val_443 +323val_323 +325val_325 +277val_277 +317val_317 +333val_333 +493val_493 +207val_207 +249val_249 +265val_265 +83val_83 +353val_353 +233val_233 +133val_133 +175val_175 +189val_189 +375val_375 +401val_401 +421val_421 +407val_407 +67val_67 +379val_379 +9val_9 +341val_341 +285val_285 +167val_167 +273val_273 +183val_183 +281val_281 +97val_97 +469val_469 +315val_315 +37val_37 +307val_307 +477val_477 +169val_169 +403val_403 +97val_97 diff --git a/data/scripts/q_test_init.sql b/data/scripts/q_test_init.sql index 01d6d4c9dd96..45a1c494eb6b 100644 --- a/data/scripts/q_test_init.sql +++ b/data/scripts/q_test_init.sql @@ -78,8 +78,8 @@ CREATE TABLE srcbucket (key INT, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket0.txt" INTO TABLE srcbucket_tmp; -LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket1.txt" INTO TABLE srcbucket_tmp; +LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/bucketed_files/000000_0" INTO TABLE srcbucket_tmp; +LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/bucketed_files/000001_0" INTO TABLE srcbucket_tmp; INSERT INTO srcbucket SELECT * FROM srcbucket_tmp; DROP TABLE srcbucket_tmp; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_1.q b/ql/src/test/queries/clientpositive/smb_mapjoin_1.q index b2394ad10f79..7cc96563a7fb 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_1.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_1.q @@ -8,9 +8,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; -load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; -load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; -load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1; +load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2; +load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3; desc formatted smb_bucket_1; select count(*) from smb_bucket_1; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_10.q b/ql/src/test/queries/clientpositive/smb_mapjoin_10.q index ab8258f60bb5..df718fbc9ee0 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_10.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_10.q @@ -8,11 +8,11 @@ alter table tmp_smb_bucket_10 add partition (ds = '2'); -- add dummy files to make sure that the number of files in each partition is same as number of buckets -load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); -load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); +load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); +load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); -load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); -load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); +load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); +load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_2.q b/ql/src/test/queries/clientpositive/smb_mapjoin_2.q index 1b184d862904..43e51ed70c2f 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_2.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_2.q @@ -8,10 +8,10 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; -load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; -load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; -load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; - +load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1; +load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2; +load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3; + set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_25.q b/ql/src/test/queries/clientpositive/smb_mapjoin_25.q index 44da19b11386..491db2e50ee7 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_25.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_25.q @@ -13,9 +13,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; -load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; -load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; -load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1; +load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2; +load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3; explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_3.q b/ql/src/test/queries/clientpositive/smb_mapjoin_3.q index ecd38cc6e4b3..4c3bcc94d470 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_3.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_3.q @@ -9,9 +9,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; -load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; -load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; -load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1; +load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2; +load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_4.q b/ql/src/test/queries/clientpositive/smb_mapjoin_4.q index 4e3fcaebf1ba..d63c7de32115 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_4.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_4.q @@ -8,9 +8,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; -load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; -load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; -load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1; +load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2; +load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_5.q b/ql/src/test/queries/clientpositive/smb_mapjoin_5.q index 0decdaffd4e9..46acbf9b9d68 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_5.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_5.q @@ -8,9 +8,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; -load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; -load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; -load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1; +load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2; +load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out index 8ca2c339c761..e4555248679f 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out index 6f1ba4f5cf9b..25e5eace540a 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out @@ -20,35 +20,35 @@ POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@tmp_smb_bucket_10 POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out index 7dcc26b5b36a..1dfacda93c8a 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out index 7b4e491c228c..cf4c74410fea 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out index 92acf4b16e19..89bdfd89373d 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out index d7a75a397251..06e41732097e 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_1.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_1.q.out index 2e0f55b57565..1182e5680b85 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_1.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_1.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_10.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_10.q.out index 6f1ba4f5cf9b..25e5eace540a 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_10.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_10.q.out @@ -20,35 +20,35 @@ POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@tmp_smb_bucket_10 POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_2.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_2.q.out index 7dcc26b5b36a..1dfacda93c8a 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_2.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_2.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 8e95f71f5040..b9838a3a1c3a 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out index 7b4e491c228c..cf4c74410fea 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index ecd7f2d1a623..ec10c4491021 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out index 1bfb36d833a5..68d39522fccb 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out @@ -20,35 +20,35 @@ POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@tmp_smb_bucket_10 POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index da419fe38431..0bcd1672efc7 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index e9e837f0d5aa..f176c187589c 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index 9362085dc627..ad879a8c94a4 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index 42161fee06c4..21171db8d7a7 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 9d1b9df01702..348d165df7c6 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -22,27 +22,27 @@ POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smb_bucket_3 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_1 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc1/000000_0' overwrite into table smb_bucket_1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_1 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_2 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc2/000000_0' overwrite into table smb_bucket_2 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_2 -PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@smb_bucket_3 -POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' overwrite into table smb_bucket_3 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 From a2ef2e5349bc97c48b946071d266abaed8d21165 Mon Sep 17 00:00:00 2001 From: djaiswal Date: Wed, 10 Jan 2018 15:56:44 -0800 Subject: [PATCH 2/3] HIVE-18409 : load data should rename files consistent with insert statements (bucketed tables only) Part11 (Deepak Jaiswal, reviewed by Eugene Koifman) --- data/files/test_dat/000000_0 | 6 ++++++ .../clientnegative/exim_11_nonpart_noncompat_sorting.q | 2 +- .../clientnegative/exim_11_nonpart_noncompat_sorting.q.out | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 data/files/test_dat/000000_0 diff --git a/data/files/test_dat/000000_0 b/data/files/test_dat/000000_0 new file mode 100644 index 000000000000..cf0389a5afee --- /dev/null +++ b/data/files/test_dat/000000_0 @@ -0,0 +1,6 @@ +1 +2 +3 +4 +5 +6 \ No newline at end of file diff --git a/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q b/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q index 1d9ae35ced71..49d83f4e01a2 100644 --- a/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q +++ b/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q @@ -7,7 +7,7 @@ create table exim_department ( dep_id int comment "department id") clustered by (dep_id) sorted by (dep_id desc) into 10 buckets stored as textfile tblproperties("creator"="krishna"); -load data local inpath "../../data/files/test.dat" into table exim_department; +load data local inpath "../../data/files/test_dat/000000_0" into table exim_department; dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp; dfs -rmr target/tmp/ql/test/data/exports/exim_department; export table exim_department to 'ql/test/data/exports/exim_department'; diff --git a/ql/src/test/results/clientnegative/exim_11_nonpart_noncompat_sorting.q.out b/ql/src/test/results/clientnegative/exim_11_nonpart_noncompat_sorting.q.out index 9220c8e806a6..872d5e6ef51d 100644 --- a/ql/src/test/results/clientnegative/exim_11_nonpart_noncompat_sorting.q.out +++ b/ql/src/test/results/clientnegative/exim_11_nonpart_noncompat_sorting.q.out @@ -12,11 +12,11 @@ POSTHOOK: query: create table exim_department ( dep_id int comment "department i POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@exim_department -PREHOOK: query: load data local inpath "../../data/files/test.dat" into table exim_department +PREHOOK: query: load data local inpath "../../data/files/test_dat/000000_0" into table exim_department PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@exim_department -POSTHOOK: query: load data local inpath "../../data/files/test.dat" into table exim_department +POSTHOOK: query: load data local inpath "../../data/files/test_dat/000000_0" into table exim_department POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@exim_department From 4d9d29a34fd0f35dade2b24ce3eb77afcde499c8 Mon Sep 17 00:00:00 2001 From: Anishek Agarwal Date: Tue, 26 Dec 2017 19:41:39 +0530 Subject: [PATCH 3/3] HIVE-18341: Add repl load support for adding "raw" namespace for TDE with same encryption keys --- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../TestReplicationOnHDFSEncryptedZones.java | 144 ++++++++++++++++++ .../hive/ql/parse/WarehouseInstance.java | 36 ++++- .../hadoop/hive/ql/parse/repl/CopyUtils.java | 78 +++++++--- 4 files changed, 241 insertions(+), 21 deletions(-) create mode 100644 itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7e3e783c6c91..af78e30ec7b6 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -462,6 +462,10 @@ public static enum ConfVars { REPL_DUMPDIR_TTL("hive.repl.dumpdir.ttl", "7d", new TimeValidator(TimeUnit.DAYS), "TTL of dump dirs before cleanup."), + //https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html#Running_as_the_superuser + REPL_ADD_RAW_RESERVED_NAMESPACE("hive.repl.add.raw.reserved.namespace", false, + "For TDE with same encryption keys on source and target, allow Distcp super user to access \n" + + "the raw bytes from filesystem without decrypting on source and then encrypting on target."), LOCALSCRATCHDIR("hive.exec.local.scratchdir", "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", "Local scratch space for Hive jobs"), diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java new file mode 100644 index 000000000000..fd05e99137e8 --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; + +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED; + +public class TestReplicationOnHDFSEncryptedZones { + private static String jksFile = System.getProperty("java.io.tmpdir") + "/test.jks"; + @Rule + public final TestName testName = new TestName(); + + protected static final Logger LOG = LoggerFactory.getLogger(TestReplicationScenarios.class); + private static WarehouseInstance primary; + private static String primaryDbName, replicatedDbName; + private static Configuration conf; + private static MiniDFSCluster miniDFSCluster; + + @BeforeClass + public static void beforeClassSetup() throws Exception { + conf = new Configuration(); + conf.set("dfs.client.use.datanode.hostname", "true"); + conf.set("hadoop.proxyuser." + Utils.getUGI().getShortUserName() + ".hosts", "*"); + conf.set("hadoop.security.key.provider.path", "jceks://file" + jksFile); + conf.setBoolean("dfs.namenode.delegation.token.always-use", true); + + conf.setLong(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE.varname, 1); + conf.setLong(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXNUMFILES.varname, 0); + conf.setBoolean(METASTORE_AGGREGATE_STATS_CACHE_ENABLED.varname, false); + + miniDFSCluster = + new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build(); + + DFSTestUtil.createKey("test_key", miniDFSCluster, conf); + primary = new WarehouseInstance(LOG, miniDFSCluster, new HashMap() {{ + put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "false"); + put(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.varname, "false"); + }}, "test_key"); + } + + @AfterClass + public static void classLevelTearDown() throws IOException { + primary.close(); + FileUtils.deleteQuietly(new File(jksFile)); + } + + @Before + public void setup() throws Throwable { + primaryDbName = testName.getMethodName() + "_" + +System.currentTimeMillis(); + replicatedDbName = "replicated_" + primaryDbName; + primary.run("create database " + primaryDbName); + } + + @Test + public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws Throwable { + DFSTestUtil.createKey("test_key123", miniDFSCluster, conf); + + WarehouseInstance replica = new WarehouseInstance(LOG, miniDFSCluster, + new HashMap() {{ + put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "false"); + put(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.varname, "false"); + }}, "test_key123"); + + WarehouseInstance.Tuple tuple = + primary.run("use " + primaryDbName) + .run("create table encrypted_table (id int, value string)") + .run("insert into table encrypted_table values (1,'value1')") + .run("insert into table encrypted_table values (2,'value2')") + .dump(primaryDbName, null); + + replica + .run("repl load " + replicatedDbName + " from '" + tuple.dumpLocation + + "' with('hive.repl.add.raw.reserved.namespace'='true')") + .run("use " + replicatedDbName) + .run("repl status " + replicatedDbName) + .verifyResult(tuple.lastReplicationId) + .run("select value from encrypted_table") + .verifyFailure(new String[] { "value1", "value2" }); + } + + @Ignore("this is ignored as minidfs cluster as of writing this test looked like did not copy the " + + "files correctly") + @Test + public void targetAndSourceHaveSameEncryptionZoneKeys() throws Throwable { + WarehouseInstance replica = new WarehouseInstance(LOG, miniDFSCluster, + new HashMap() {{ + put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "false"); + put(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.varname, "false"); + put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname, + UserGroupInformation.getCurrentUser().getUserName()); + }}, "test_key"); + + WarehouseInstance.Tuple tuple = + primary.run("use " + primaryDbName) + .run("create table encrypted_table (id int, value string)") + .run("insert into table encrypted_table values (1,'value1')") + .run("insert into table encrypted_table values (2,'value2')") + .dump(primaryDbName, null); + + replica + .run("repl load " + replicatedDbName + " from '" + tuple.dumpLocation + + "' with('hive.repl.add.raw.reserved.namespace'='true')") + .run("use " + replicatedDbName) + .run("repl status " + replicatedDbName) + .verifyResult(tuple.lastReplicationId) + .run("select value from encrypted_table") + .verifyResults(new String[] { "value1", "value2" }); + } +} diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index 061817fc7f7a..c7866f2931e8 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -53,6 +53,7 @@ Licensed to the Apache Software Foundation (ASF) under one import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -68,8 +69,8 @@ class WarehouseInstance implements Closeable { private final static String LISTENER_CLASS = DbNotificationListener.class.getCanonicalName(); - WarehouseInstance(Logger logger, MiniDFSCluster cluster, Map overridesForHiveConf) - throws Exception { + WarehouseInstance(Logger logger, MiniDFSCluster cluster, Map overridesForHiveConf, + String keyNameForEncryptedZone) throws Exception { this.logger = logger; this.miniDFSCluster = cluster; assert miniDFSCluster.isClusterUp(); @@ -77,15 +78,28 @@ class WarehouseInstance implements Closeable { DistributedFileSystem fs = miniDFSCluster.getFileSystem(); Path warehouseRoot = mkDir(fs, "/warehouse" + uniqueIdentifier); + if (StringUtils.isNotEmpty(keyNameForEncryptedZone)) { + fs.createEncryptionZone(warehouseRoot, keyNameForEncryptedZone); + } Path cmRootPath = mkDir(fs, "/cmroot" + uniqueIdentifier); this.functionsRoot = mkDir(fs, "/functions" + uniqueIdentifier).toString(); initialize(cmRootPath.toString(), warehouseRoot.toString(), overridesForHiveConf); } - WarehouseInstance(Logger logger, MiniDFSCluster cluster) throws Exception { + WarehouseInstance(Logger logger, MiniDFSCluster cluster, String keyNameForEncryptedZone) + throws Exception { this(logger, cluster, new HashMap() {{ put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "true"); - }}); + }}, keyNameForEncryptedZone); + } + + WarehouseInstance(Logger logger, MiniDFSCluster cluster, + Map overridesForHiveConf) throws Exception { + this(logger, cluster, overridesForHiveConf, null); + } + + WarehouseInstance(Logger logger, MiniDFSCluster cluster) throws Exception { + this(logger, cluster, (String) null); } private void initialize(String cmRoot, String warehouseRoot, @@ -218,6 +232,20 @@ WarehouseInstance verifyResults(String[] data) throws IOException { return this; } + WarehouseInstance verifyFailure(String[] data) throws IOException { + List results = getOutput(); + logger.info("Expecting {}", StringUtils.join(data, ",")); + logger.info("Got {}", results); + boolean dataMatched = (data.length == results.size()); + if (dataMatched) { + for (int i = 0; i < data.length; i++) { + dataMatched &= data[i].toLowerCase().equals(results.get(i).toLowerCase()); + } + } + assertFalse(dataMatched); + return this; + } + /** * verify's result without regard for ordering. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java index f24d1b65024d..4e61280c9eb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.parse.repl; +import com.google.common.collect.Lists; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -31,25 +32,28 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; - import javax.security.auth.login.LoginException; import java.io.IOException; +import java.net.URI; +import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class CopyUtils { private static final Logger LOG = LoggerFactory.getLogger(CopyUtils.class); + // https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html#Running_as_the_superuser + private static final String RAW_RESERVED_VIRTUAL_PATH = "/.reserved/raw/"; + private static final int MAX_COPY_RETRY = 3; private final HiveConf hiveConf; private final long maxCopyFileSize; private final long maxNumberOfFiles; private final boolean hiveInTest; private final String copyAsUser; - private final int MAX_COPY_RETRY = 3; public CopyUtils(String distCpDoAsUser, HiveConf hiveConf) { this.hiveConf = hiveConf; @@ -107,8 +111,7 @@ private void doCopyRetry(FileSystem sourceFs, List f FileSystem destinationFs, Path destination, boolean useRegularCopy) throws IOException, LoginException { int repeat = 0; - List pathList = Lists.transform(fileList, - fileInfo -> { return fileInfo.getEffectivePath(); }); + List pathList = Lists.transform(fileList, ReplChangeManager.FileInfo::getEffectivePath); while (!pathList.isEmpty() && (repeat < MAX_COPY_RETRY)) { try { doCopyOnce(sourceFs, pathList, destinationFs, destination, useRegularCopy); @@ -143,21 +146,62 @@ private void doCopyOnce(FileSystem sourceFs, List srcList, boolean useRegularCopy) throws IOException, LoginException { UserGroupInformation ugi = Utils.getUGI(); String currentUser = ugi.getShortUserName(); - boolean usePrivilegedDistCp = copyAsUser != null && !currentUser.equals(copyAsUser); + boolean usePrivilegedUser = copyAsUser != null && !currentUser.equals(copyAsUser); if (useRegularCopy) { - Path[] paths = srcList.toArray(new Path[] {}); - FileUtil.copy(sourceFs, paths, destinationFs, destination, false, true, hiveConf); + doRegularCopyOnce(sourceFs, srcList, destinationFs, destination, usePrivilegedUser); + } else { + doDistCpCopyOnce(sourceFs, srcList, destination, usePrivilegedUser); + } + } + + private void doDistCpCopyOnce(FileSystem sourceFs, List srcList, Path destination, + boolean usePrivilegedUser) throws IOException { + if (hiveConf.getBoolVar(HiveConf.ConfVars.REPL_ADD_RAW_RESERVED_NAMESPACE)) { + srcList = srcList.stream().map(path -> { + URI uri = path.toUri(); + return new Path(uri.getScheme(), uri.getAuthority(), + RAW_RESERVED_VIRTUAL_PATH + uri.getPath()); + }).collect(Collectors.toList()); + URI destinationUri = destination.toUri(); + destination = new Path(destinationUri.getScheme(), destinationUri.getAuthority(), + RAW_RESERVED_VIRTUAL_PATH + destinationUri.getPath()); + hiveConf.set("distcp.options.px",""); + } + + FileUtils.distCp( + sourceFs, // source file system + srcList, // list of source paths + destination, + false, + usePrivilegedUser ? copyAsUser : null, + hiveConf, + ShimLoader.getHadoopShims() + ); + } + + private void doRegularCopyOnce(FileSystem sourceFs, List srcList, FileSystem destinationFs, + Path destination, boolean usePrivilegedUser) throws IOException { + /* + even for regular copy we have to use the same user permissions that distCp will use since + hive-server user might be different that the super user required to copy relevant files. + */ + final Path[] paths = srcList.toArray(new Path[] {}); + if (usePrivilegedUser) { + final Path finalDestination = destination; + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser( + copyAsUser, UserGroupInformation.getLoginUser()); + try { + proxyUser.doAs((PrivilegedExceptionAction) () -> { + FileUtil + .copy(sourceFs, paths, destinationFs, finalDestination, false, true, hiveConf); + return true; + }); + } catch (InterruptedException e) { + throw new IOException(e); + } } else { - FileUtils.distCp( - sourceFs, // source file system - srcList, // list of source paths - destination, - false, - usePrivilegedDistCp ? copyAsUser : null, - hiveConf, - ShimLoader.getHadoopShims() - ); + FileUtil.copy(sourceFs, paths, destinationFs, destination, false, true, hiveConf); } }