From 214386759e283c9a94f6f52ea276a4df278232e0 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Thu, 28 Nov 2024 16:31:31 +0530 Subject: [PATCH 01/13] HADOOP-19254: Implement bulk delete command as hadoop fs command operation --- .../hadoop/fs/shell/BulkDeleteCommand.java | 87 +++++++++++++++++++ .../org/apache/hadoop/fs/shell/FsCommand.java | 1 + 2 files changed, 88 insertions(+) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java new file mode 100644 index 0000000000000..0e116916ce8bd --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -0,0 +1,87 @@ +package org.apache.hadoop.fs.shell; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Collectors; + +public class BulkDeleteCommand extends FsCommand { + Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); + public static void registerCommands(CommandFactory factory) { + factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); + } + + public static final String name = "bulkDelete"; + + public static final String READ_FROM_FILE = "readFromFile"; + + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [ ]"; + + public static final String DESCRIPTION = "Deletes the set of files under the given path. If a list of paths " + + "is provided then the paths are deleted directly. User can also point to the file where the paths are" + + "listed as full object names."; + + private String fileName; + + /* + Making the class stateful as the PathData initialization for all args is not needed + */ + LinkedList childArgs; + + protected BulkDeleteCommand() {} + + protected BulkDeleteCommand(Configuration conf) {super(conf);} + + @Override + protected void processOptions(LinkedList args) throws IOException { + CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE); + cf.addOptionWithValue(READ_FROM_FILE); + cf.parse(args); + fileName = cf.getOptValue(READ_FROM_FILE); + LOG.warn(fileName); + } + + @Override + protected LinkedList expandArguments(LinkedList args) throws IOException { + LOG.warn(args.toString()); + if(fileName == null && args.size() < 2) { + throw new IOException("Invalid Number of Arguments. Expected more"); + } + LinkedList pathData = new LinkedList<>(); + pathData.add(new PathData(args.get(0), getConf())); + args.remove(0); + this.childArgs = args; + return pathData; + } + + @Override + protected void processArguments(LinkedList args) throws IOException { + System.out.println(args.toString()); + PathData basePath = args.get(0); + out.println("Deleting files under:" + basePath); + List pathList = new ArrayList<>(); + if(fileName != null) { + LOG.warn("IN here"); + FileSystem localFile = FileSystem.get(getConf()); + BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)))); + String line; + while((line = br.readLine()) != null) { + pathList.add(new Path(line)); + } + } else { + pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); + } + BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path); + bulkDelete.bulkDelete(pathList); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java index 0abf0e3c587ad..7a2c8ea29a805 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java @@ -71,6 +71,7 @@ public static void registerCommands(CommandFactory factory) { factory.registerCommands(SnapshotCommands.class); factory.registerCommands(XAttrCommands.class); factory.registerCommands(Concat.class); + factory.registerCommands(BulkDeleteCommand.class); } protected FsCommand() {} From 3d9cfb1a479084d392a0d7fa5cf5e5cc35043d5e Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Fri, 29 Nov 2024 20:08:43 +0530 Subject: [PATCH 02/13] HADOOP-19254: Implement bulk delete command as hadoop fs command operation --- .../hadoop/fs/shell/BulkDeleteCommand.java | 11 ++--- .../fs/shell/TestBulkDeleteCommand.java | 45 +++++++++++++++++++ 2 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 0e116916ce8bd..18116c138d242 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -16,7 +16,6 @@ import java.util.stream.Collectors; public class BulkDeleteCommand extends FsCommand { - Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); public static void registerCommands(CommandFactory factory) { factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); } @@ -38,22 +37,22 @@ public static void registerCommands(CommandFactory factory) { */ LinkedList childArgs; - protected BulkDeleteCommand() {} + protected BulkDeleteCommand() { + this.childArgs = new LinkedList<>(); + } protected BulkDeleteCommand(Configuration conf) {super(conf);} @Override protected void processOptions(LinkedList args) throws IOException { - CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE); + CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); cf.addOptionWithValue(READ_FROM_FILE); cf.parse(args); fileName = cf.getOptValue(READ_FROM_FILE); - LOG.warn(fileName); } @Override protected LinkedList expandArguments(LinkedList args) throws IOException { - LOG.warn(args.toString()); if(fileName == null && args.size() < 2) { throw new IOException("Invalid Number of Arguments. Expected more"); } @@ -66,12 +65,10 @@ protected LinkedList expandArguments(LinkedList args) throws I @Override protected void processArguments(LinkedList args) throws IOException { - System.out.println(args.toString()); PathData basePath = args.get(0); out.println("Deleting files under:" + basePath); List pathList = new ArrayList<>(); if(fileName != null) { - LOG.warn("IN here"); FileSystem localFile = FileSystem.get(getConf()); BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)))); String line; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java new file mode 100644 index 0000000000000..c601af16d212f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -0,0 +1,45 @@ +package org.apache.hadoop.fs.shell; + +import org.apache.hadoop.conf.Configuration; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.LinkedList; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestBulkDeleteCommand { + private static Configuration conf; + + @BeforeClass + public static void setup() throws IOException { + conf = new Configuration(); + } + + @Test + public void testDefaults() throws IOException { + LinkedList options = new LinkedList<>(); + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(); + bulkDeleteCommand.processOptions(options); + assertTrue(bulkDeleteCommand.childArgs.isEmpty()); + } + + @Test + public void testArguments() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + String arg2 = "file:///file/name/2"; + arguments.add(arg1); + arguments.add(arg2); + LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); + assertEquals(1, pathData.size()); + assertEquals(new URI(arg1).getPath(), pathData.get(0).path.toUri().getPath()); + assertEquals(1, bulkDeleteCommand.childArgs.size()); + assertEquals(arg2, bulkDeleteCommand.childArgs.get(0)); + } +} From cba5e494560587081496f83626cad07a675079c0 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Wed, 4 Dec 2024 16:29:52 +0530 Subject: [PATCH 03/13] Basic Nit Review Fixes --- .../hadoop/fs/shell/BulkDeleteCommand.java | 34 ++++++++++---- .../org/apache/hadoop/fs/shell/FsCommand.java | 2 +- .../fs/shell/TestBulkDeleteCommand.java | 44 ++++++++++++++----- 3 files changed, 60 insertions(+), 20 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 18116c138d242..0bdc8e7881d5a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -1,11 +1,22 @@ -package org.apache.hadoop.fs.shell; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BulkDelete; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +package org.apache.hadoop.fs.shell; import java.io.BufferedReader; import java.io.IOException; @@ -15,6 +26,11 @@ import java.util.List; import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + public class BulkDeleteCommand extends FsCommand { public static void registerCommands(CommandFactory factory) { factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); @@ -73,7 +89,9 @@ protected void processArguments(LinkedList args) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)))); String line; while((line = br.readLine()) != null) { - pathList.add(new Path(line)); + if(!line.startsWith("#")) { + pathList.add(new Path(line)); + } } } else { pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java index 7a2c8ea29a805..32889e01fa489 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java @@ -51,6 +51,7 @@ abstract public class FsCommand extends Command { */ public static void registerCommands(CommandFactory factory) { factory.registerCommands(AclCommands.class); + factory.registerCommands(BulkDeleteCommand.class); factory.registerCommands(CopyCommands.class); factory.registerCommands(Count.class); factory.registerCommands(Delete.class); @@ -71,7 +72,6 @@ public static void registerCommands(CommandFactory factory) { factory.registerCommands(SnapshotCommands.class); factory.registerCommands(XAttrCommands.class); factory.registerCommands(Concat.class); - factory.registerCommands(BulkDeleteCommand.class); } protected FsCommand() {} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java index c601af16d212f..faf3a81b69314 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -1,18 +1,35 @@ -package org.apache.hadoop.fs.shell; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ -import org.apache.hadoop.conf.Configuration; -import org.junit.BeforeClass; -import org.junit.Test; +package org.apache.hadoop.fs.shell; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.LinkedList; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.HadoopTestBase; +import org.assertj.core.api.Assertions; +import org.junit.BeforeClass; +import org.junit.Test; -public class TestBulkDeleteCommand { +public class TestBulkDeleteCommand extends HadoopTestBase { private static Configuration conf; @BeforeClass @@ -37,9 +54,14 @@ public void testArguments() throws IOException, URISyntaxException { arguments.add(arg1); arguments.add(arg2); LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); - assertEquals(1, pathData.size()); - assertEquals(new URI(arg1).getPath(), pathData.get(0).path.toUri().getPath()); - assertEquals(1, bulkDeleteCommand.childArgs.size()); - assertEquals(arg2, bulkDeleteCommand.childArgs.get(0)); + Assertions.assertThat(pathData.size()). + describedAs("Only one root path must be present").isEqualTo(1); + Assertions.assertThat(pathData.get(0).path.toUri().getPath()). + describedAs("Base path of the command should match").isEqualTo(new URI(arg1).getPath()); + Assertions.assertThat(bulkDeleteCommand.childArgs.size()). + describedAs("Only one other argument was passed to the command"). + isEqualTo(1); + Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). + describedAs("Children arguments must match").isEqualTo(arg2); } } From d91fcb463082b045ed0e6e64f14d8c2e90aaf5e8 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Fri, 6 Dec 2024 20:07:09 +0530 Subject: [PATCH 04/13] Basic Nit Review Fixes with Page Size Implementation --- .../hadoop/fs/shell/BulkDeleteCommand.java | 52 +++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 0bdc8e7881d5a..79453b527c253 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -21,9 +21,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; +import java.util.*; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; @@ -40,7 +38,10 @@ public static void registerCommands(CommandFactory factory) { public static final String READ_FROM_FILE = "readFromFile"; - public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [ ]"; + public static final String PAGE_SIZE = "pageSize"; + + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [" + + PAGE_SIZE + "] [] [ ]"; public static final String DESCRIPTION = "Deletes the set of files under the given path. If a list of paths " + "is provided then the paths are deleted directly. User can also point to the file where the paths are" + @@ -48,6 +49,8 @@ public static void registerCommands(CommandFactory factory) { private String fileName; + private int pageSize; + /* Making the class stateful as the PathData initialization for all args is not needed */ @@ -63,8 +66,14 @@ protected BulkDeleteCommand() { protected void processOptions(LinkedList args) throws IOException { CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); cf.addOptionWithValue(READ_FROM_FILE); + cf.addOptionWithValue(PAGE_SIZE); cf.parse(args); fileName = cf.getOptValue(READ_FROM_FILE); + if(cf.getOptValue(PAGE_SIZE) != null) { + pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE)); + } else { + pageSize = 1; + } } @Override @@ -79,6 +88,13 @@ protected LinkedList expandArguments(LinkedList args) throws I return pathData; } + void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException { + Batch batches = new Batch<>(paths, pageSize); + while(batches.hasNext()) { + bulkDelete.bulkDelete(batches.next()); + } + } + @Override protected void processArguments(LinkedList args) throws IOException { PathData basePath = args.get(0); @@ -97,6 +113,32 @@ protected void processArguments(LinkedList args) throws IOException { pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); } BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path); - bulkDelete.bulkDelete(pathList); + deleteInBatches(bulkDelete, pathList); + } + + static class Batch { + List data; + int batchSize; + int currentLocation; + + Batch(List data, int batchSize) { + this.data = Collections.unmodifiableList(data); + this.batchSize = batchSize; + this.currentLocation = 0; + } + + boolean hasNext() { + return this.currentLocation < this.data.size(); + } + + List next() { + List ret = new ArrayList<>(); + int i = currentLocation; + for(; i < Math.min(currentLocation + batchSize, data.size()); i++) { + ret.add(this.data.get(i)); + } + currentLocation = i; + return ret; + } } } From ba618cc84651b2b880562f80a845807ba3624294 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Tue, 10 Dec 2024 22:47:28 +0530 Subject: [PATCH 05/13] Review Fixes --- .../hadoop/fs/shell/BulkDeleteCommand.java | 73 +++++++++++++++---- .../site/markdown/filesystem/bulkdelete.md | 12 +++ 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 79453b527c253..1339bf3d9844c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -28,24 +28,40 @@ import org.apache.hadoop.fs.BulkDelete; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class BulkDeleteCommand extends FsCommand { + public static void registerCommands(CommandFactory factory) { factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); } + private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); + public static final String name = "bulkDelete"; + /** + * File Name parameter to be specified at command line + */ public static final String READ_FROM_FILE = "readFromFile"; + /** + * Page size parameter specified at command line + */ public static final String PAGE_SIZE = "pageSize"; + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [" + PAGE_SIZE + "] [] [ ]"; - public static final String DESCRIPTION = "Deletes the set of files under the given path. If a list of paths " + - "is provided then the paths are deleted directly. User can also point to the file where the paths are" + - "listed as full object names."; + public static final String DESCRIPTION = "Deletes the set of files under the given .\n" + + "If a list of paths is provided at command line then the paths are deleted directly.\n" + + "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + + "parameter. The presence of a file name takes precedence over the list of objects.\n" + + "Page size refers to the size of each bulk delete batch." + + "Users can specify the page size using \"pageSize\" command parameter." + + "Default value is 1.\n"; private String fileName; @@ -62,6 +78,11 @@ protected BulkDeleteCommand() { protected BulkDeleteCommand(Configuration conf) {super(conf);} + /** + * Processes the command line options and initialize the variables + * @param args the command line arguments + * @throws IOException + */ @Override protected void processOptions(LinkedList args) throws IOException { CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); @@ -76,6 +97,12 @@ protected void processOptions(LinkedList args) throws IOException { } } + /** + * Processes the command line arguments and stores the child arguments in a list + * @param args strings to expand into {@link PathData} objects + * @return the base path of the bulk delete command. + * @throws IOException if the wrong number of arguments specified + */ @Override protected LinkedList expandArguments(LinkedList args) throws IOException { if(fileName == null && args.size() < 2) { @@ -88,19 +115,27 @@ protected LinkedList expandArguments(LinkedList args) throws I return pathData; } + /** + * Deletes the objects using the bulk delete api + * @param bulkDelete Bulkdelete object exposing the API + * @param paths list of paths to be deleted in the base path + * @throws IOException on error in execution of the delete command + */ void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException { Batch batches = new Batch<>(paths, pageSize); while(batches.hasNext()) { - bulkDelete.bulkDelete(batches.next()); + List> result = bulkDelete.bulkDelete(batches.next()); + LOG.debug(result.toString()); } } @Override protected void processArguments(LinkedList args) throws IOException { PathData basePath = args.get(0); - out.println("Deleting files under:" + basePath); + LOG.info("Deleting files under:{}", basePath); List pathList = new ArrayList<>(); if(fileName != null) { + LOG.info("Reading from file:{}", fileName); FileSystem localFile = FileSystem.get(getConf()); BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)))); String line; @@ -112,14 +147,19 @@ protected void processArguments(LinkedList args) throws IOException { } else { pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); } + LOG.debug("Deleting:{}", pathList); BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path); deleteInBatches(bulkDelete, pathList); } + /** + * Batch class for deleting files in batches, once initialized the inner list can't be modified. + * @param template type for batches + */ static class Batch { - List data; - int batchSize; - int currentLocation; + private final List data; + private final int batchSize; + private int currentLocation; Batch(List data, int batchSize) { this.data = Collections.unmodifiableList(data); @@ -127,17 +167,24 @@ static class Batch { this.currentLocation = 0; } + /** + * @return If there is a next batch present + */ boolean hasNext() { - return this.currentLocation < this.data.size(); + return currentLocation < data.size(); } + /** + * @return Compute and return a new batch + */ List next() { List ret = new ArrayList<>(); - int i = currentLocation; - for(; i < Math.min(currentLocation + batchSize, data.size()); i++) { - ret.add(this.data.get(i)); + int i = 0; + while(i < batchSize && currentLocation < data.size()) { + ret.add(data.get(currentLocation)); + i++; + currentLocation++; } - currentLocation = i; return ret; } } diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md index 14048da43a348..c59b21d0a5c79 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md @@ -72,6 +72,18 @@ public interface BulkDelete extends IOStatisticsSource, Closeable { } ``` +### Shell Command `hadoop fs -bulkdelete base_path` + +This is the command line implementation of the bulkdelete API, the user can specify a base path and the underlying +paths for deletion. The list of paths can also be read from a file with each line containing +a separate path for deletion. + +```angular2html +hadoop fs -bulkdelete ..... +hadoop fs -bulkdelete -pageSize 10 ..... +hadoop fs -bulkdelete -fileName +``` + ### `bulkDelete(paths)` From 9df12c4de52772fb5ed79b366f338929f56044aa Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Wed, 11 Dec 2024 19:07:18 +0530 Subject: [PATCH 06/13] Added tests for local fileSystem deletion --- .../site/markdown/filesystem/bulkdelete.md | 2 +- .../fs/shell/TestBulkDeleteCommand.java | 68 ++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md index c59b21d0a5c79..40b3128ad23b8 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md @@ -81,7 +81,7 @@ a separate path for deletion. ```angular2html hadoop fs -bulkdelete ..... hadoop fs -bulkdelete -pageSize 10 ..... -hadoop fs -bulkdelete -fileName +hadoop fs -bulkdelete -readFromFile ``` diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java index faf3a81b69314..ec078e8521655 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -18,23 +18,41 @@ package org.apache.hadoop.fs.shell; -import java.io.IOException; +import java.io.*; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; +import java.util.List; +import com.jcraft.jsch.Buffer; +import com.jcraft.jsch.IO; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.HadoopTestBase; import org.assertj.core.api.Assertions; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; public class TestBulkDeleteCommand extends HadoopTestBase { private static Configuration conf; + private static FsShell shell; + private static LocalFileSystem lfs; + private static Path testRootDir; @BeforeClass public static void setup() throws IOException { conf = new Configuration(); + shell = new FsShell(conf); + lfs = FileSystem.getLocal(conf); + testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath( + "testFsShellBulkDelete"))); + lfs.delete(testRootDir, true); + lfs.mkdirs(testRootDir); + lfs.setWorkingDirectory(testRootDir); } @Test @@ -64,4 +82,52 @@ public void testArguments() throws IOException, URISyntaxException { Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). describedAs("Children arguments must match").isEqualTo(arg2); } + + @Test + public void testLocalFileDeletion() throws IOException { + String deletionDir = "toDelete"; + String baseFileName = "file_"; + Path baseDir = new Path(testRootDir, deletionDir); + List listOfPaths = new ArrayList<>(); + for(int i = 0; i < 100; i++) { + Path p = new Path(baseDir, baseFileName + i); + lfs.create(p); + listOfPaths.add(p.toUri().toString()); + } + List finalCommandList = new ArrayList<>(); + finalCommandList.add("-bulkDelete"); + finalCommandList.add(baseDir.toUri().toString()); + finalCommandList.addAll(listOfPaths); + shell.run(finalCommandList.toArray(new String[0])); + Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) + .as("All the files should have been deleted").isEqualTo(false); + + } + + @Test + public void testLocalFileDeletionWithFileName() throws IOException { + String deletionDir = "toDelete"; + String baseFileName = "file_"; + Path baseDir = new Path(testRootDir, deletionDir); + Path fileWithDeletePaths = new Path(testRootDir, "fileWithDeletePaths"); + FSDataOutputStream fsDataOutputStream = lfs.create(fileWithDeletePaths, true); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream)); + for(int i = 0; i < 100; i++) { + Path p = new Path(baseDir, baseFileName + i); + lfs.create(p); + br.write(p.toUri().toString()); + br.newLine(); + } + br.flush(); // flush the file to write the contents + br.close(); // close the writer + List finalCommandList = new ArrayList<>(); + finalCommandList.add("-bulkDelete"); + finalCommandList.add("-readFromFile"); + finalCommandList.add(fileWithDeletePaths.toUri().toString()); + finalCommandList.add(baseDir.toUri().toString()); + shell.run(finalCommandList.toArray(new String[0])); + Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) + .as("All the files should have been deleted").isEqualTo(false); + + } } From 5408e8b6bd88dc0bd3b8a0d0812499d889926e06 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Thu, 12 Dec 2024 12:00:40 +0530 Subject: [PATCH 07/13] Yetus fixes --- .../java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java | 7 +++++-- .../org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java | 4 ---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 1339bf3d9844c..af7efeef604be 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -21,6 +21,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; @@ -81,7 +82,7 @@ protected BulkDeleteCommand() { /** * Processes the command line options and initialize the variables * @param args the command line arguments - * @throws IOException + * @throws IOException in case of wrong arguments passed */ @Override protected void processOptions(LinkedList args) throws IOException { @@ -137,13 +138,15 @@ protected void processArguments(LinkedList args) throws IOException { if(fileName != null) { LOG.info("Reading from file:{}", fileName); FileSystem localFile = FileSystem.get(getConf()); - BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)))); + BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)), + StandardCharsets.UTF_8)); String line; while((line = br.readLine()) != null) { if(!line.startsWith("#")) { pathList.add(new Path(line)); } } + br.close(); } else { pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java index ec078e8521655..78429686b3b24 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -22,18 +22,14 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedList; import java.util.List; -import com.jcraft.jsch.Buffer; -import com.jcraft.jsch.IO; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.HadoopTestBase; import org.assertj.core.api.Assertions; -import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; From 2a3b0e4fbcae197ee7bab32611e6655ab5c6c218 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Tue, 17 Dec 2024 14:18:08 +0530 Subject: [PATCH 08/13] Review Fixes --- .../hadoop/fs/shell/BulkDeleteCommand.java | 289 +++++++++--------- .../site/markdown/filesystem/bulkdelete.md | 2 +- .../fs/shell/TestBulkDeleteCommand.java | 184 ++++++----- 3 files changed, 246 insertions(+), 229 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index af7efeef604be..2ef9a79fcaf6e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -34,161 +34,162 @@ public class BulkDeleteCommand extends FsCommand { - public static void registerCommands(CommandFactory factory) { - factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); + public static void registerCommands(CommandFactory factory) { + factory.addClass(BulkDeleteCommand.class, "-bulkDelete"); + } + + private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); + + public static final String name = "bulkDelete"; + + /** + * File Name parameter to be specified at command line + */ + public static final String READ_FROM_FILE = "readFromFile"; + + /** + * Page size parameter specified at command line + */ + public static final String PAGE_SIZE = "pageSize"; + + + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [" + PAGE_SIZE + "] [] [ ]"; + + public static final String DESCRIPTION = "Deletes the set of files under the given .\n" + "If a list of paths is provided at command line then the paths are deleted directly.\n" + "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + "parameter. The presence of a file name takes precedence over the list of objects.\n" + "Page size refers to the size of each bulk delete batch." + "Users can specify the page size using \"pageSize\" command parameter." + "Default value is 1.\n"; + + private String fileName; + + private int pageSize; + + /* + Making the class stateful as the PathData initialization for all args is not needed + */ LinkedList childArgs; + + protected BulkDeleteCommand() { + this.childArgs = new LinkedList<>(); + } + + protected BulkDeleteCommand(Configuration conf) { + super(conf); + } + + /** + * Processes the command line options and initialize the variables + * + * @param args the command line arguments + * @throws IOException in case of wrong arguments passed + */ + @Override + protected void processOptions(LinkedList args) throws IOException { + CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); + cf.addOptionWithValue(READ_FROM_FILE); + cf.addOptionWithValue(PAGE_SIZE); + cf.parse(args); + fileName = cf.getOptValue(READ_FROM_FILE); + if (cf.getOptValue(PAGE_SIZE) != null) { + pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE)); + } else { + pageSize = 1; } - - private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); - - public static final String name = "bulkDelete"; - - /** - * File Name parameter to be specified at command line - */ - public static final String READ_FROM_FILE = "readFromFile"; - - /** - * Page size parameter specified at command line - */ - public static final String PAGE_SIZE = "pageSize"; - - - public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [" + - PAGE_SIZE + "] [] [ ]"; - - public static final String DESCRIPTION = "Deletes the set of files under the given .\n" + - "If a list of paths is provided at command line then the paths are deleted directly.\n" + - "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + - "parameter. The presence of a file name takes precedence over the list of objects.\n" + - "Page size refers to the size of each bulk delete batch." + - "Users can specify the page size using \"pageSize\" command parameter." + - "Default value is 1.\n"; - - private String fileName; - - private int pageSize; - - /* - Making the class stateful as the PathData initialization for all args is not needed - */ - LinkedList childArgs; - - protected BulkDeleteCommand() { - this.childArgs = new LinkedList<>(); + } + + /** + * Processes the command line arguments and stores the child arguments in a list + * + * @param args strings to expand into {@link PathData} objects + * @return the base path of the bulk delete command. + * @throws IOException if the wrong number of arguments specified + */ + @Override + protected LinkedList expandArguments(LinkedList args) throws IOException { + if (fileName == null && args.size() < 2) { + throw new IOException("Invalid Number of Arguments. Expected more"); } - - protected BulkDeleteCommand(Configuration conf) {super(conf);} - - /** - * Processes the command line options and initialize the variables - * @param args the command line arguments - * @throws IOException in case of wrong arguments passed - */ - @Override - protected void processOptions(LinkedList args) throws IOException { - CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE); - cf.addOptionWithValue(READ_FROM_FILE); - cf.addOptionWithValue(PAGE_SIZE); - cf.parse(args); - fileName = cf.getOptValue(READ_FROM_FILE); - if(cf.getOptValue(PAGE_SIZE) != null) { - pageSize = Integer.parseInt(cf.getOptValue(PAGE_SIZE)); - } else { - pageSize = 1; - } + LinkedList pathData = new LinkedList<>(); + pathData.add(new PathData(args.get(0), getConf())); + args.remove(0); + this.childArgs = args; + return pathData; + } + + /** + * Deletes the objects using the bulk delete api + * + * @param bulkDelete Bulkdelete object exposing the API + * @param paths list of paths to be deleted in the base path + * @throws IOException on error in execution of the delete command + */ + void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException { + Batch batches = new Batch<>(paths, pageSize); + while (batches.hasNext()) { + try { + List> result = bulkDelete.bulkDelete(batches.next()); + LOG.debug("Deleted Result:{}", result.toString()); + } catch (IllegalArgumentException e) { + LOG.error("Caught exception while deleting", e); + } } - - /** - * Processes the command line arguments and stores the child arguments in a list - * @param args strings to expand into {@link PathData} objects - * @return the base path of the bulk delete command. - * @throws IOException if the wrong number of arguments specified - */ - @Override - protected LinkedList expandArguments(LinkedList args) throws IOException { - if(fileName == null && args.size() < 2) { - throw new IOException("Invalid Number of Arguments. Expected more"); + } + + @Override + protected void processArguments(LinkedList args) throws IOException { + PathData basePath = args.get(0); + LOG.info("Deleting files under:{}", basePath); + List pathList = new ArrayList<>(); + if (fileName != null) { + LOG.info("Reading from file:{}", fileName); + FileSystem localFile = FileSystem.get(getConf()); + BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)), StandardCharsets.UTF_8)); + String line; + while ((line = br.readLine()) != null) { + if (!line.startsWith("#")) { + pathList.add(new Path(line)); } - LinkedList pathData = new LinkedList<>(); - pathData.add(new PathData(args.get(0), getConf())); - args.remove(0); - this.childArgs = args; - return pathData; + } + br.close(); + } else { + pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); + } + LOG.debug("Deleting:{}", pathList); + BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path); + deleteInBatches(bulkDelete, pathList); + } + + /** + * Batch class for deleting files in batches, once initialized the inner list can't be modified. + * + * @param template type for batches + */ + static class Batch { + private final List data; + private final int batchSize; + private int currentLocation; + + Batch(List data, int batchSize) { + this.data = Collections.unmodifiableList(data); + this.batchSize = batchSize; + this.currentLocation = 0; } /** - * Deletes the objects using the bulk delete api - * @param bulkDelete Bulkdelete object exposing the API - * @param paths list of paths to be deleted in the base path - * @throws IOException on error in execution of the delete command + * @return If there is a next batch present */ - void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException { - Batch batches = new Batch<>(paths, pageSize); - while(batches.hasNext()) { - List> result = bulkDelete.bulkDelete(batches.next()); - LOG.debug(result.toString()); - } - } - - @Override - protected void processArguments(LinkedList args) throws IOException { - PathData basePath = args.get(0); - LOG.info("Deleting files under:{}", basePath); - List pathList = new ArrayList<>(); - if(fileName != null) { - LOG.info("Reading from file:{}", fileName); - FileSystem localFile = FileSystem.get(getConf()); - BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)), - StandardCharsets.UTF_8)); - String line; - while((line = br.readLine()) != null) { - if(!line.startsWith("#")) { - pathList.add(new Path(line)); - } - } - br.close(); - } else { - pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); - } - LOG.debug("Deleting:{}", pathList); - BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path); - deleteInBatches(bulkDelete, pathList); + boolean hasNext() { + return currentLocation < data.size(); } /** - * Batch class for deleting files in batches, once initialized the inner list can't be modified. - * @param template type for batches + * @return Compute and return a new batch */ - static class Batch { - private final List data; - private final int batchSize; - private int currentLocation; - - Batch(List data, int batchSize) { - this.data = Collections.unmodifiableList(data); - this.batchSize = batchSize; - this.currentLocation = 0; - } - - /** - * @return If there is a next batch present - */ - boolean hasNext() { - return currentLocation < data.size(); - } - - /** - * @return Compute and return a new batch - */ - List next() { - List ret = new ArrayList<>(); - int i = 0; - while(i < batchSize && currentLocation < data.size()) { - ret.add(data.get(currentLocation)); - i++; - currentLocation++; - } - return ret; - } + List next() { + List ret = new ArrayList<>(); + int i = 0; + while (i < batchSize && currentLocation < data.size()) { + ret.add(data.get(currentLocation)); + i++; + currentLocation++; + } + return ret; } + } } diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md index 40b3128ad23b8..dd65b6f5673fa 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md @@ -74,7 +74,7 @@ public interface BulkDelete extends IOStatisticsSource, Closeable { ``` ### Shell Command `hadoop fs -bulkdelete base_path` -This is the command line implementation of the bulkdelete API, the user can specify a base path and the underlying +This is the command line implementation of the bulkdelete API, the user can specify a base path and the underlying paths for deletion. The list of paths can also be read from a file with each line containing a separate path for deletion. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java index 78429686b3b24..4a5e91b5082c3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -33,97 +33,113 @@ import org.junit.BeforeClass; import org.junit.Test; -public class TestBulkDeleteCommand extends HadoopTestBase { - private static Configuration conf; - private static FsShell shell; - private static LocalFileSystem lfs; - private static Path testRootDir; +public class TestBulkDeleteCommand extends HadoopTestBase { + private static Configuration conf; + private static FsShell shell; + private static LocalFileSystem lfs; + private static Path testRootDir; - @BeforeClass - public static void setup() throws IOException { - conf = new Configuration(); - shell = new FsShell(conf); - lfs = FileSystem.getLocal(conf); - testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath( - "testFsShellBulkDelete"))); - lfs.delete(testRootDir, true); - lfs.mkdirs(testRootDir); - lfs.setWorkingDirectory(testRootDir); - } + @BeforeClass + public static void setup() throws IOException { + conf = new Configuration(); + shell = new FsShell(conf); + lfs = FileSystem.getLocal(conf); + testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath( + "testFsShellBulkDelete"))); + lfs.delete(testRootDir, true); + lfs.mkdirs(testRootDir); + lfs.setWorkingDirectory(testRootDir); + } - @Test - public void testDefaults() throws IOException { - LinkedList options = new LinkedList<>(); - BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(); - bulkDeleteCommand.processOptions(options); - assertTrue(bulkDeleteCommand.childArgs.isEmpty()); - } + @Test + public void testDefaults() throws IOException { + LinkedList options = new LinkedList<>(); + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(); + bulkDeleteCommand.processOptions(options); + assertTrue(bulkDeleteCommand.childArgs.isEmpty()); + } - @Test - public void testArguments() throws IOException, URISyntaxException { - BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); - LinkedList arguments = new LinkedList<>(); - String arg1 = "file:///file/name/1"; - String arg2 = "file:///file/name/2"; - arguments.add(arg1); - arguments.add(arg2); - LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); - Assertions.assertThat(pathData.size()). - describedAs("Only one root path must be present").isEqualTo(1); - Assertions.assertThat(pathData.get(0).path.toUri().getPath()). - describedAs("Base path of the command should match").isEqualTo(new URI(arg1).getPath()); - Assertions.assertThat(bulkDeleteCommand.childArgs.size()). - describedAs("Only one other argument was passed to the command"). - isEqualTo(1); - Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). - describedAs("Children arguments must match").isEqualTo(arg2); - } + @Test + public void testArguments() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + String arg2 = "file:///file/name/2"; + arguments.add(arg1); + arguments.add(arg2); + LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); + Assertions.assertThat(pathData.size()). + describedAs("Only one root path must be present").isEqualTo(1); + Assertions.assertThat(pathData.get(0).path.toUri().getPath()). + describedAs("Base path of the command should match").isEqualTo(new URI(arg1).getPath()); + Assertions.assertThat(bulkDeleteCommand.childArgs.size()). + describedAs("Only one other argument was passed to the command"). + isEqualTo(1); + Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). + describedAs("Children arguments must match").isEqualTo(arg2); + } - @Test - public void testLocalFileDeletion() throws IOException { - String deletionDir = "toDelete"; - String baseFileName = "file_"; - Path baseDir = new Path(testRootDir, deletionDir); - List listOfPaths = new ArrayList<>(); - for(int i = 0; i < 100; i++) { - Path p = new Path(baseDir, baseFileName + i); - lfs.create(p); - listOfPaths.add(p.toUri().toString()); - } - List finalCommandList = new ArrayList<>(); - finalCommandList.add("-bulkDelete"); - finalCommandList.add(baseDir.toUri().toString()); - finalCommandList.addAll(listOfPaths); - shell.run(finalCommandList.toArray(new String[0])); - Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) - .as("All the files should have been deleted").isEqualTo(false); + @Test + public void testWrongArguments() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + arguments.add(arg1); + Assertions.assertThatThrownBy(() -> bulkDeleteCommand.expandArguments(arguments)). + describedAs("No children to be deleted specified in the command."). + isInstanceOf(IOException.class); + } + @Test + public void testLocalFileDeletion() throws IOException { + String deletionDir = "toDelete"; + String baseFileName = "file_"; + Path baseDir = new Path(testRootDir, deletionDir); + List listOfPaths = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + Path p = new Path(baseDir, baseFileName + i); + lfs.create(p); } + RemoteIterator remoteIterator = lfs.listFiles(baseDir, false); + while (remoteIterator.hasNext()) { + listOfPaths.add(remoteIterator.next().getPath().toUri().toString()); + } + List finalCommandList = new ArrayList<>(); + finalCommandList.add("-bulkDelete"); + finalCommandList.add(baseDir.toUri().toString()); + finalCommandList.addAll(listOfPaths); + shell.run(finalCommandList.toArray(new String[0])); + Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) + .as("All the files should have been deleted under the path:" + + baseDir).isEqualTo(false); - @Test - public void testLocalFileDeletionWithFileName() throws IOException { - String deletionDir = "toDelete"; - String baseFileName = "file_"; - Path baseDir = new Path(testRootDir, deletionDir); - Path fileWithDeletePaths = new Path(testRootDir, "fileWithDeletePaths"); - FSDataOutputStream fsDataOutputStream = lfs.create(fileWithDeletePaths, true); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream)); - for(int i = 0; i < 100; i++) { - Path p = new Path(baseDir, baseFileName + i); - lfs.create(p); - br.write(p.toUri().toString()); - br.newLine(); - } - br.flush(); // flush the file to write the contents - br.close(); // close the writer - List finalCommandList = new ArrayList<>(); - finalCommandList.add("-bulkDelete"); - finalCommandList.add("-readFromFile"); - finalCommandList.add(fileWithDeletePaths.toUri().toString()); - finalCommandList.add(baseDir.toUri().toString()); - shell.run(finalCommandList.toArray(new String[0])); - Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) - .as("All the files should have been deleted").isEqualTo(false); + } + @Test + public void testLocalFileDeletionWithFileName() throws IOException { + String deletionDir = "toDelete"; + String baseFileName = "file_"; + Path baseDir = new Path(testRootDir, deletionDir); + Path fileWithDeletePaths = new Path(testRootDir, "fileWithDeletePaths"); + FSDataOutputStream fsDataOutputStream = lfs.create(fileWithDeletePaths, true); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream)); + for (int i = 0; i < 100; i++) { + Path p = new Path(baseDir, baseFileName + i); + lfs.create(p); + br.write(p.toUri().toString()); + br.newLine(); } + br.flush(); // flush the file to write the contents + br.close(); // close the writer + List finalCommandList = new ArrayList<>(); + finalCommandList.add("-bulkDelete"); + finalCommandList.add("-readFromFile"); + finalCommandList.add(fileWithDeletePaths.toUri().toString()); + finalCommandList.add(baseDir.toUri().toString()); + shell.run(finalCommandList.toArray(new String[0])); + Assertions.assertThat(lfs.listFiles(baseDir, false).hasNext()) + .as("All the files should have been deleted under the path:" + + baseDir).isEqualTo(false); + + } } From a94d49ab489d81e6a44b2591f8a970f5bc080188 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Wed, 18 Dec 2024 15:55:22 +0530 Subject: [PATCH 09/13] Yetus fixes --- .../hadoop/fs/shell/BulkDeleteCommand.java | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 2ef9a79fcaf6e..fef0ad42e9638 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -40,22 +40,29 @@ public static void registerCommands(CommandFactory factory) { private static final Logger LOG = LoggerFactory.getLogger(BulkDeleteCommand.class.getName()); - public static final String name = "bulkDelete"; + public static final String NAME = "bulkDelete"; /** - * File Name parameter to be specified at command line + * File Name parameter to be specified at command line. */ public static final String READ_FROM_FILE = "readFromFile"; /** - * Page size parameter specified at command line + * Page size parameter specified at command line. */ public static final String PAGE_SIZE = "pageSize"; - public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [" + PAGE_SIZE + "] [] [ ]"; + public static final String USAGE = "-[ " + READ_FROM_FILE + "] [] [" + PAGE_SIZE + + "] [] [ ]"; - public static final String DESCRIPTION = "Deletes the set of files under the given .\n" + "If a list of paths is provided at command line then the paths are deleted directly.\n" + "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + "parameter. The presence of a file name takes precedence over the list of objects.\n" + "Page size refers to the size of each bulk delete batch." + "Users can specify the page size using \"pageSize\" command parameter." + "Default value is 1.\n"; + public static final String DESCRIPTION = "Deletes the set of files under the given .\n" + + "If a list of paths is provided at command line then the paths are deleted directly.\n" + + "User can also point to the file where the paths are listed as full object names using the \"fileName\"" + + "parameter. The presence of a file name takes precedence over the list of objects.\n" + + "Page size refers to the size of each bulk delete batch." + + "Users can specify the page size using \"pageSize\" command parameter." + + "Default value is 1.\n"; private String fileName; @@ -74,7 +81,7 @@ protected BulkDeleteCommand(Configuration conf) { } /** - * Processes the command line options and initialize the variables + * Processes the command line options and initialize the variables. * * @param args the command line arguments * @throws IOException in case of wrong arguments passed @@ -94,7 +101,7 @@ protected void processOptions(LinkedList args) throws IOException { } /** - * Processes the command line arguments and stores the child arguments in a list + * Processes the command line arguments and stores the child arguments in a list. * * @param args strings to expand into {@link PathData} objects * @return the base path of the bulk delete command. @@ -113,7 +120,7 @@ protected LinkedList expandArguments(LinkedList args) throws I } /** - * Deletes the objects using the bulk delete api + * Deletes the objects using the bulk delete api. * * @param bulkDelete Bulkdelete object exposing the API * @param paths list of paths to be deleted in the base path @@ -139,7 +146,8 @@ protected void processArguments(LinkedList args) throws IOException { if (fileName != null) { LOG.info("Reading from file:{}", fileName); FileSystem localFile = FileSystem.get(getConf()); - BufferedReader br = new BufferedReader(new InputStreamReader(localFile.open(new Path(fileName)), StandardCharsets.UTF_8)); + BufferedReader br = new BufferedReader(new InputStreamReader( + localFile.open(new Path(fileName)), StandardCharsets.UTF_8)); String line; while ((line = br.readLine()) != null) { if (!line.startsWith("#")) { From b35692e46e6dbe8f01c32a5332ba658f85b2bc73 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Mon, 6 Jan 2025 13:33:12 +0530 Subject: [PATCH 10/13] Review Fixes --- .../hadoop/fs/shell/BulkDeleteCommand.java | 12 +++++++--- .../fs/shell/TestBulkDeleteCommand.java | 23 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index fef0ad42e9638..8ddde70b3900f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -68,9 +68,10 @@ public static void registerCommands(CommandFactory factory) { private int pageSize; - /* - Making the class stateful as the PathData initialization for all args is not needed - */ LinkedList childArgs; + /** + * Making the class stateful as the PathData initialization for all args is not needed + */ + LinkedList childArgs; protected BulkDeleteCommand() { this.childArgs = new LinkedList<>(); @@ -78,6 +79,8 @@ protected BulkDeleteCommand() { protected BulkDeleteCommand(Configuration conf) { super(conf); + this.childArgs = new LinkedList<>(); + this.pageSize = 1; } /** @@ -134,6 +137,7 @@ void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException LOG.debug("Deleted Result:{}", result.toString()); } catch (IllegalArgumentException e) { LOG.error("Caught exception while deleting", e); + throw new IOException(e); } } } @@ -197,6 +201,8 @@ List next() { i++; currentLocation++; } + LOG.warn(ret.toString()); + assert(ret.size() == this.batchSize); return ret; } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java index 4a5e91b5082c3..fdcf8d1859616 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.HadoopTestBase; import org.assertj.core.api.Assertions; @@ -54,7 +55,7 @@ public static void setup() throws IOException { @Test public void testDefaults() throws IOException { LinkedList options = new LinkedList<>(); - BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(); + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); bulkDeleteCommand.processOptions(options); assertTrue(bulkDeleteCommand.childArgs.isEmpty()); } @@ -64,7 +65,7 @@ public void testArguments() throws IOException, URISyntaxException { BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); LinkedList arguments = new LinkedList<>(); String arg1 = "file:///file/name/1"; - String arg2 = "file:///file/name/2"; + String arg2 = "file:///file/name/1/2"; arguments.add(arg1); arguments.add(arg2); LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); @@ -98,7 +99,7 @@ public void testLocalFileDeletion() throws IOException { List listOfPaths = new ArrayList<>(); for (int i = 0; i < 100; i++) { Path p = new Path(baseDir, baseFileName + i); - lfs.create(p); + ContractTestUtils.touch(lfs, p); } RemoteIterator remoteIterator = lfs.listFiles(baseDir, false); while (remoteIterator.hasNext()) { @@ -125,7 +126,7 @@ public void testLocalFileDeletionWithFileName() throws IOException { BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream)); for (int i = 0; i < 100; i++) { Path p = new Path(baseDir, baseFileName + i); - lfs.create(p); + ContractTestUtils.touch(lfs, p); br.write(p.toUri().toString()); br.newLine(); } @@ -142,4 +143,18 @@ public void testLocalFileDeletionWithFileName() throws IOException { baseDir).isEqualTo(false); } + + @Test + public void testWrongArgumentsWithNonChildFile() throws IOException, URISyntaxException { + BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); + LinkedList arguments = new LinkedList<>(); + String arg1 = "file:///file/name/1"; + String arg2 = "file:///file/name"; + arguments.add(arg1); + arguments.add(arg2); + LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); + Assertions.assertThatThrownBy(() -> bulkDeleteCommand.processArguments(pathData)). + describedAs("Child paths must be contained inside the base path"). + isInstanceOf(IOException.class); + } } From 85e3966955e993d1a647106c4ff2fd94d6ed56aa Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Mon, 6 Jan 2025 14:17:53 +0530 Subject: [PATCH 11/13] Bug Fixes --- .../main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index 8ddde70b3900f..e8159309c3097 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -201,8 +201,6 @@ List next() { i++; currentLocation++; } - LOG.warn(ret.toString()); - assert(ret.size() == this.batchSize); return ret; } } From 3b86822e6fd7b6a4b787db7c53793fcbd30743a9 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Mon, 13 Jan 2025 12:10:41 +0530 Subject: [PATCH 12/13] Review Fixes --- .../hadoop/fs/shell/BulkDeleteCommand.java | 20 ++++++--- .../fs/shell/TestBulkDeleteCommand.java | 43 +++++++++++-------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index e8159309c3097..e14163a3bf788 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -22,7 +22,11 @@ import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; @@ -69,7 +73,7 @@ public static void registerCommands(CommandFactory factory) { private int pageSize; /** - * Making the class stateful as the PathData initialization for all args is not needed + * Making the class stateful as the PathData initialization for all args is not needed. */ LinkedList childArgs; @@ -134,9 +138,10 @@ void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException while (batches.hasNext()) { try { List> result = bulkDelete.bulkDelete(batches.next()); + LOG.warn("Number of failed deletions:{}", result.size()); LOG.debug("Deleted Result:{}", result.toString()); } catch (IllegalArgumentException e) { - LOG.error("Caught exception while deleting", e); + LOG.error("Exception while deleting", e); throw new IOException(e); } } @@ -154,13 +159,16 @@ protected void processArguments(LinkedList args) throws IOException { localFile.open(new Path(fileName)), StandardCharsets.UTF_8)); String line; while ((line = br.readLine()) != null) { - if (!line.startsWith("#")) { + line = line.trim(); + if (!line.isEmpty() && !line.startsWith("#")) { pathList.add(new Path(line)); } } br.close(); } else { - pathList.addAll(this.childArgs.stream().map(Path::new).collect(Collectors.toList())); + pathList.addAll(childArgs.stream(). + map(Path::new). + collect(Collectors.toList())); } LOG.debug("Deleting:{}", pathList); BulkDelete bulkDelete = basePath.fs.createBulkDelete(basePath.path); @@ -172,7 +180,7 @@ protected void processArguments(LinkedList args) throws IOException { * * @param template type for batches */ - static class Batch { + private static class Batch { private final List data; private final int batchSize; private int currentLocation; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java index fdcf8d1859616..0f6b70d0e0463 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestBulkDeleteCommand.java @@ -18,7 +18,9 @@ package org.apache.hadoop.fs.shell; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; @@ -26,15 +28,23 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FsShell; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.test.AbstractHadoopTestBase; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.test.HadoopTestBase; import org.assertj.core.api.Assertions; import org.junit.BeforeClass; import org.junit.Test; -public class TestBulkDeleteCommand extends HadoopTestBase { +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class TestBulkDeleteCommand extends AbstractHadoopTestBase { private static Configuration conf; private static FsShell shell; private static LocalFileSystem lfs; @@ -57,7 +67,8 @@ public void testDefaults() throws IOException { LinkedList options = new LinkedList<>(); BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); bulkDeleteCommand.processOptions(options); - assertTrue(bulkDeleteCommand.childArgs.isEmpty()); + Assertions.assertThat(bulkDeleteCommand.childArgs). + describedAs("Children arguments should be empty").isEmpty(); } @Test @@ -69,26 +80,24 @@ public void testArguments() throws IOException, URISyntaxException { arguments.add(arg1); arguments.add(arg2); LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); - Assertions.assertThat(pathData.size()). - describedAs("Only one root path must be present").isEqualTo(1); + Assertions.assertThat(pathData). + describedAs("Only one root path must be present").hasSize(1); Assertions.assertThat(pathData.get(0).path.toUri().getPath()). describedAs("Base path of the command should match").isEqualTo(new URI(arg1).getPath()); - Assertions.assertThat(bulkDeleteCommand.childArgs.size()). + Assertions.assertThat(bulkDeleteCommand.childArgs). describedAs("Only one other argument was passed to the command"). - isEqualTo(1); + hasSize(1); Assertions.assertThat(bulkDeleteCommand.childArgs.get(0)). describedAs("Children arguments must match").isEqualTo(arg2); } @Test - public void testWrongArguments() throws IOException, URISyntaxException { + public void testWrongArguments() throws Exception { BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); LinkedList arguments = new LinkedList<>(); String arg1 = "file:///file/name/1"; arguments.add(arg1); - Assertions.assertThatThrownBy(() -> bulkDeleteCommand.expandArguments(arguments)). - describedAs("No children to be deleted specified in the command."). - isInstanceOf(IOException.class); + intercept(IOException.class, () -> bulkDeleteCommand.expandArguments(arguments)); } @Test @@ -100,6 +109,7 @@ public void testLocalFileDeletion() throws IOException { for (int i = 0; i < 100; i++) { Path p = new Path(baseDir, baseFileName + i); ContractTestUtils.touch(lfs, p); + ContractTestUtils.assertIsFile(lfs, p); } RemoteIterator remoteIterator = lfs.listFiles(baseDir, false); while (remoteIterator.hasNext()) { @@ -127,6 +137,7 @@ public void testLocalFileDeletionWithFileName() throws IOException { for (int i = 0; i < 100; i++) { Path p = new Path(baseDir, baseFileName + i); ContractTestUtils.touch(lfs, p); + ContractTestUtils.assertIsFile(lfs, p); br.write(p.toUri().toString()); br.newLine(); } @@ -145,7 +156,7 @@ public void testLocalFileDeletionWithFileName() throws IOException { } @Test - public void testWrongArgumentsWithNonChildFile() throws IOException, URISyntaxException { + public void testWrongArgumentsWithNonChildFile() throws Exception { BulkDeleteCommand bulkDeleteCommand = new BulkDeleteCommand(conf); LinkedList arguments = new LinkedList<>(); String arg1 = "file:///file/name/1"; @@ -153,8 +164,6 @@ public void testWrongArgumentsWithNonChildFile() throws IOException, URISyntaxEx arguments.add(arg1); arguments.add(arg2); LinkedList pathData = bulkDeleteCommand.expandArguments(arguments); - Assertions.assertThatThrownBy(() -> bulkDeleteCommand.processArguments(pathData)). - describedAs("Child paths must be contained inside the base path"). - isInstanceOf(IOException.class); + intercept(IOException.class, () -> bulkDeleteCommand.processArguments(pathData)); } } From 5b1ba7f4edda9ee484bec5a5b1722fc6de19aa30 Mon Sep 17 00:00:00 2001 From: "harshit.gupta" Date: Mon, 20 Jan 2025 11:44:28 +0530 Subject: [PATCH 13/13] Review Fixes --- .../apache/hadoop/fs/shell/BulkDeleteCommand.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java index e14163a3bf788..5f79e6393d6d4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/BulkDeleteCommand.java @@ -117,7 +117,7 @@ protected void processOptions(LinkedList args) throws IOException { @Override protected LinkedList expandArguments(LinkedList args) throws IOException { if (fileName == null && args.size() < 2) { - throw new IOException("Invalid Number of Arguments. Expected more"); + throw new IOException("Invalid Number of Arguments. Expected :" + USAGE); } LinkedList pathData = new LinkedList<>(); pathData.add(new PathData(args.get(0), getConf())); @@ -138,11 +138,14 @@ void deleteInBatches(BulkDelete bulkDelete, List paths) throws IOException while (batches.hasNext()) { try { List> result = bulkDelete.bulkDelete(batches.next()); - LOG.warn("Number of failed deletions:{}", result.size()); - LOG.debug("Deleted Result:{}", result.toString()); + if(!result.isEmpty()) { + LOG.warn("Number of failed deletions:{}", result.size()); + for(Map.Entry singleResult: result) { + LOG.info("{}: {}", singleResult.getKey(), singleResult.getValue()); + } + } } catch (IllegalArgumentException e) { - LOG.error("Exception while deleting", e); - throw new IOException(e); + throw new IOException("Exception while deleting: ", e); } } }