Skip to content

Commit

Permalink
Use HashMap for tracking SpeedFile objects and paths
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasThelen committed Mar 16, 2023
1 parent 4351d79 commit 378f591
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 52 deletions.
59 changes: 19 additions & 40 deletions src/main/java/org/dataone/speedbagit/SpeedBagIt.java
Expand Up @@ -33,10 +33,8 @@
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -74,9 +72,10 @@ public class SpeedBagIt {
// Map of key-values that go in the bagit.txt file
public Map<String, String> bagitMetadata;

// A list holding all of the files in the bag
private List<SpeedFile> dataFiles;
private List<SpeedFile> tagFiles;
// Containers for keeping track of tag & data files, keyed off of
// their relative file path
private HashMap<String, SpeedFile> dataFiles;
private HashMap<String, SpeedFile> tagFiles;

// An ExecutorService to run the piped stream in another thread
private static ExecutorService executor = null;
Expand All @@ -101,8 +100,8 @@ public SpeedBagIt(double version,
Map<String, String> bagitMetadata) throws IOException {
this.version = version;
this.checksumAlgorithm = checksumAlgorithm;
this.dataFiles = new ArrayList<>();
this.tagFiles = new ArrayList<>();
this.dataFiles = new HashMap<>();
this.tagFiles = new HashMap<>();
this.bagitMetadata = bagitMetadata;
this.dataManifestFile = new HashMap<> ();
this.tagManifestFile = new HashMap<> ();
Expand All @@ -120,17 +119,7 @@ public SpeedBagIt(double version,
*/
public SpeedBagIt(double version,
String checksumAlgorithm) throws IOException {
this.version = version;
this.checksumAlgorithm = checksumAlgorithm;
this.dataFiles = new ArrayList<>();
this.tagFiles = new ArrayList<>();
this.bagitMetadata = new HashMap<> ();
this.dataManifestFile = new HashMap<> ();
this.tagManifestFile = new HashMap<> ();

this.properties = new Properties();
this.properties.load(Objects.requireNonNull(this.getClass().
getClassLoader().getResourceAsStream("speed-bagit.properties")));
this(version, checksumAlgorithm, new HashMap<>());
}

/**
Expand All @@ -152,9 +141,9 @@ public void addFile(InputStream file, String bagPath, MessageDigest checksum, bo
}
SpeedFile newFile = new SpeedFile(new SpeedStream(file, checksum), bagPath, isTagFile);
if (isTagFile) {
this.tagFiles.add(newFile);
this.tagFiles.put(bagPath, newFile);
} else {
this.dataFiles.add(newFile);
this.dataFiles.put(bagPath, newFile);
}
}

Expand All @@ -166,23 +155,13 @@ public void addFile(InputStream file, String bagPath, MessageDigest checksum, bo
*/
private boolean hasPathCollisions(String path, boolean isTagFile) {
if (isTagFile) {
for (SpeedFile tagFile : this.tagFiles) {
if (Objects.equals(tagFile.getPath(), path)) {
return true;
}
}
return this.tagFiles.containsKey(path);
} else {
for (SpeedFile dataFile : this.dataFiles) {
if (Objects.equals(dataFile.getPath(), path)) {
return true;
}
}
return this.dataFiles.containsKey(path);
}
return false;
}



/**
* Adds a stream of data to the bag.
*
Expand Down Expand Up @@ -317,9 +296,9 @@ public void run() {
String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
logger.info(String.format("Streaming bag at %s", timeStamp));
int totalSize = 0;
// Stream all of the files in the root 'data' directory
// Stream all the files in the root 'data' directory

for (SpeedFile streamingFile : dataFiles) {
for (SpeedFile streamingFile : dataFiles.values()) {
try {
streamFile(zos, streamingFile);
String checksum = new String(streamingFile.getStream().getChecksum());
Expand Down Expand Up @@ -351,8 +330,8 @@ public void run() {
fileStream = new ByteArrayInputStream(dataManifest.getBytes(StandardCharsets.UTF_8));
addFile(fileStream, fileName, MessageDigest.getInstance(checksumAlgorithm), true);

// Write all of the tag files
for (SpeedFile streamingFile : tagFiles) {
// Write all the tag files
for (SpeedFile streamingFile : tagFiles.values()) {
try {
streamFile(zos, streamingFile);
String checksum = streamingFile.getStream().getChecksum();
Expand Down Expand Up @@ -394,11 +373,11 @@ public int getPayloadFileCount() {
return this.dataFiles.size();
}
/**
* Returns all of the tag files that have been added to
* Returns all the tag files that have been added to
* the bag.
* @return List of tag files
* @return HashMap of tag files
*/
public List<SpeedFile> getTagFiles() {
public HashMap<String, SpeedFile> getTagFiles() {
return this.tagFiles;
}

Expand All @@ -407,7 +386,7 @@ public List<SpeedFile> getTagFiles() {
* to the bag. These are the files that belong under data/
* @return List of data files
*/
public List<SpeedFile> getDataFiles() {
public HashMap<String, SpeedFile> getDataFiles() {
return this.dataFiles;
}

Expand Down
4 changes: 2 additions & 2 deletions src/test/java/org/dataone/speedbagit/ProfilingTest.java
Expand Up @@ -43,8 +43,8 @@

/**
* A suite of tests that should be run under a profiler and ignored by CI systems and ordinary builds.
* These should be run before each release to ensure that emory management is
* sane (ie entire files aren't loaded into memory at once).
* These should be run before each release to ensure that memory management is
* working properly (ie entire files aren't loaded into memory at once).
*/

public class ProfilingTest {
Expand Down
17 changes: 7 additions & 10 deletions src/test/java/org/dataone/speedbagit/SpeedBagItTest.java
Expand Up @@ -49,10 +49,7 @@
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import static org.junit.jupiter.api.Assertions.*;


/**
Expand Down Expand Up @@ -340,30 +337,30 @@ public void testAddFile() throws SpeedBagException, NoSuchAlgorithmException, IO


SpeedBagIt bag = getStockBag();
List<SpeedFile> dataFiles = bag.getDataFiles();
HashMap<String, SpeedFile> dataFiles = bag.getDataFiles();
assert dataFiles.size() == 2;
for (SpeedFile dataFile: dataFiles) {
for (SpeedFile dataFile: dataFiles.values()) {
assert expectedDataPaths.contains(dataFile.getPath());
}

List<SpeedFile> metadataFiles = bag.getTagFiles();
HashMap<String, SpeedFile> metadataFiles = bag.getTagFiles();
assert metadataFiles.size() == 2;
for (SpeedFile tagFile: metadataFiles) {
for (SpeedFile tagFile: metadataFiles.values()) {
assert expecteMetadataPaths.contains(tagFile.getPath());
}
}

@Test
public void testGetDataFiles() throws SpeedBagException, NoSuchAlgorithmException, IOException {
SpeedBagIt bag = getStockBag();
List<SpeedFile> dataFiles = bag.getDataFiles();
HashMap<String, SpeedFile> dataFiles = bag.getDataFiles();
assert dataFiles.size() == 2;
}

@Test
public void testGetTagFiles() throws SpeedBagException, NoSuchAlgorithmException, IOException {
SpeedBagIt bag = getStockBag();
List<SpeedFile> metadataFiles = bag.getTagFiles();
HashMap<String, SpeedFile> metadataFiles = bag.getTagFiles();
assert metadataFiles.size() == 2;
}

Expand Down

0 comments on commit 378f591

Please sign in to comment.