Skip to content

Commit

Permalink
Check for name collisions between files as they're added to the bag
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasThelen committed Mar 15, 2023
1 parent 22d8d51 commit 4351d79
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 22 deletions.
59 changes: 43 additions & 16 deletions src/main/java/org/dataone/speedbagit/SpeedBagIt.java
Expand Up @@ -128,7 +128,6 @@ public SpeedBagIt(double version,
this.dataManifestFile = new HashMap<> ();
this.tagManifestFile = new HashMap<> ();


this.properties = new Properties();
this.properties.load(Objects.requireNonNull(this.getClass().
getClassLoader().getResourceAsStream("speed-bagit.properties")));
Expand All @@ -138,19 +137,51 @@ public SpeedBagIt(double version,
* Adds a stream of data to the bag.
*
* @param file: The stream representing a file or data that will be placed in the bag
* @param bagPath: The path, relative to the bag root where the file belongs
* @param checksum: A MessageDigest object that will hold the checksum
* @param bagPath: The path, relative to the bag root where the file belongs
* @param checksum: A MessageDigest object that will hold the checksum
* @param isTagFile: Boolean set to True when the file is a tag file
*/
public void addFile(InputStream file, String bagPath, MessageDigest checksum, boolean isTagFile) {
public void addFile(InputStream file, String bagPath, MessageDigest checksum, boolean isTagFile)
throws SpeedBagException {
logger.debug(String.format("Adding %s to the bag", bagPath));
// Check to see if there's a path conflict
if (this.hasPathCollisions(bagPath, isTagFile)) {
throw new SpeedBagException(
String.format("The tag file with path %s conflicts with another file.", bagPath)
);
}
SpeedFile newFile = new SpeedFile(new SpeedStream(file, checksum), bagPath, isTagFile);
if (isTagFile) {
this.tagFiles.add(newFile);
} else {
this.dataFiles.add(newFile);
if (isTagFile) {
this.tagFiles.add(newFile);
} else {
this.dataFiles.add(newFile);
}
}

/**
* Checks whether two paths collide, based on their file type (tag vs data file).
*
* @param path: Path being checked against the previously added files
* @param isTagFile: A flag whether the file is a data file or not (otherwise it will be a tag)
*/
private boolean hasPathCollisions(String path, boolean isTagFile) {
if (isTagFile) {
for (SpeedFile tagFile : this.tagFiles) {
if (Objects.equals(tagFile.getPath(), path)) {
return true;
}
}
} else {
for (SpeedFile dataFile : this.dataFiles) {
if (Objects.equals(dataFile.getPath(), path)) {
return true;
}
}
}
return false;
}
}



/**
* Adds a stream of data to the bag.
Expand All @@ -159,15 +190,10 @@ public void addFile(InputStream file, String bagPath, MessageDigest checksum, bo
* @param bagPath: The path, relative to the bag root where the file belongs
* @param isTagFile: Boolean set to True when the file is a tag file
*/
public void addFile(InputStream file, String bagPath, boolean isTagFile) throws NoSuchAlgorithmException {
public void addFile(InputStream file, String bagPath, boolean isTagFile) throws NoSuchAlgorithmException, SpeedBagException {
logger.debug(String.format("Adding %s to the bag", bagPath));
MessageDigest newDigest = MessageDigest.getInstance(this.checksumAlgorithm);
SpeedFile newFile = new SpeedFile(new SpeedStream(file, newDigest), bagPath, isTagFile);
if (isTagFile) {
this.tagFiles.add(newFile);
} else {
this.dataFiles.add(newFile);
}
this.addFile(file, bagPath, newDigest, isTagFile);
}

/**
Expand Down Expand Up @@ -399,3 +425,4 @@ public static String bagFileToString(Map<String, String> mapFile) {
return builder.toString();
}
}

7 changes: 4 additions & 3 deletions src/test/java/org/dataone/speedbagit/ProfilingTest.java
Expand Up @@ -54,7 +54,7 @@ public class ProfilingTest {
*/
@Test
@Disabled
public void testLargeFiles() throws IOException, NoSuchAlgorithmException {
public void testLargeFiles() throws IOException, NoSuchAlgorithmException, SpeedBagException {
// Create 100, 1GB files
GenerateFiles("largeFiles/", 100, 1000000000L);
CreateBag("largeFiles/", "./bagged_data.zip");
Expand All @@ -66,7 +66,7 @@ public void testLargeFiles() throws IOException, NoSuchAlgorithmException {
*/
@Test
@Disabled
public void testSmallFiles() throws IOException, NoSuchAlgorithmException {
public void testSmallFiles() throws IOException, NoSuchAlgorithmException, SpeedBagException {
GenerateFiles("smallFiles/", 5000, 1000);
CreateBag("smallFiles/", "./bagged_data.zip");
}
Expand Down Expand Up @@ -118,7 +118,7 @@ public static void GenerateFiles(String targetDirectory, int fileCount, long tar
* @param PayloadPath The path to the data directory that will be bagged
* @param bagPath The path to the bagit archive that will be created
*/
public static void CreateBag(String PayloadPath, String bagPath) throws IOException, NoSuchAlgorithmException {
public static void CreateBag(String PayloadPath, String bagPath) throws IOException, NoSuchAlgorithmException, SpeedBagException {
SpeedBagIt bag = new SpeedBagIt(1.0, "MD5");
File dataDirectory = new File(PayloadPath);
File[] directoryListing = dataDirectory.listFiles();
Expand All @@ -140,3 +140,4 @@ public static void CreateBag(String PayloadPath, String bagPath) throws IOExcept
IOUtils.copy(bagStream, fos);
}
}

35 changes: 32 additions & 3 deletions src/test/java/org/dataone/speedbagit/SpeedBagItTest.java
Expand Up @@ -48,11 +48,13 @@
import org.junit.jupiter.api.io.TempDir;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;



/**
* Unit tests for the SpeedBagIt class. Because this class is the main interface for
* creating bags, most integrated unit tests are in this file.
Expand Down Expand Up @@ -137,7 +139,7 @@ public void testCtorMetadata() throws IOException {
* Helper method that creates a stock Bag
* @return The SpeedBag object
*/
public SpeedBagIt getStockBag() throws NoSuchAlgorithmException, IOException {
public SpeedBagIt getStockBag() throws NoSuchAlgorithmException, IOException, SpeedBagException {
double bagVersion = 1.0;
String checksumAlgorithm = "MD5";
Map<String, String> bagMetadata = new HashMap<>();
Expand Down Expand Up @@ -400,7 +402,7 @@ public void testDataBagExport() {
this.validateBagItFiles(zipFile, bagVersion, bag.getPayloadFileCount(), checksumAlgorithm);
Files.delete(bagFilePath);

} catch (IOException | NoSuchAlgorithmException e) {
} catch (IOException | NoSuchAlgorithmException | SpeedBagException e) {
fail(e);
}
}
Expand Down Expand Up @@ -441,8 +443,35 @@ public void testMetadataBagExport() {
// Make sure that the bag files are correct
this.validateBagItFiles(zipFile, bagVersion, bag.getPayloadFileCount(), checksumAlgorithm);
Files.delete(bagFilePath);
} catch (IOException | NoSuchAlgorithmException e) {
} catch (IOException | NoSuchAlgorithmException | SpeedBagException e) {
fail(e);
}
}

/**
* Tests that when two files are added with the same name, SpeedBagException is thrown.
*/
@Test
void testDuplicateAddFile() throws IOException, NoSuchAlgorithmException, SpeedBagException {
double bagVersion = 1.0;
String checksumAlgorithm = "MD5";
Map<String, String> bagMetadata = new HashMap<>();
SpeedBagIt bag = new SpeedBagIt(bagVersion, checksumAlgorithm, bagMetadata);

String dataFile1 = "1234, 9876, 3845";
String dataFile2 = "trees, cars, bridges";
InputStream dataFile1Stream = new ByteArrayInputStream(dataFile1.getBytes(StandardCharsets.UTF_8));
InputStream dataFile2Stream = new ByteArrayInputStream(dataFile2.getBytes(StandardCharsets.UTF_8));
// Check standard data files
bag.addFile(dataFile1Stream, "data/data_file1.csv", false);
assertThrows(SpeedBagException.class, ()-> {
bag.addFile(dataFile2Stream, "data/data_file1.csv", false);
});
// Check tag files
bag.addFile(dataFile1Stream, "tag/data_file1.csv", true);
assertThrows(SpeedBagException.class, ()-> {
bag.addFile(dataFile2Stream, "tag/data_file1.csv", true);
});
}
}

0 comments on commit 4351d79

Please sign in to comment.