Skip to content

Commit

Permalink
Added support for SHA256 checksums
Browse files Browse the repository at this point in the history
  • Loading branch information
jfarwer committed Oct 21, 2022
1 parent 0005853 commit bcf39e7
Show file tree
Hide file tree
Showing 4 changed files with 278 additions and 19 deletions.
68 changes: 68 additions & 0 deletions WebContent/profile-block.jspf
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@
<a href="edit-metadata?collection=user&archiveID=<%=archiveID%>" style="cursor:pointer;margin-left:75px;"><img style="height:25px" src="images/edit_summary.svg"/></a>
<% } %>

<button id="btn_delete" class="btn-default" onclick="updateHash(archiveID);" style="float: right;">Update checksum</button>

</div>
<hr/>
<!--div class="modal-footer"-->
Expand Down Expand Up @@ -263,5 +265,71 @@
});
} // End of delArchive

function updateHash (archive1) {
$('#collection-summary-modal').modal("hide");
BootstrapDialog.show ({
title: 'Update Checksum',
message: 'Confirm to update the checksum?',
type: BootstrapDialog.TYPE_INFO,
closable: false,
// data: {
// 'folder': folder
// },
buttons: [
{
id: 'btn-yes',
// icon: 'glyphicon glyphicon-check',
label: 'Yes',
cssClass: 'btn-default',
autospin: false,
action: function(dialogRef){
$.ajax({
type: "POST",
url: "ajax/hashUpdate",
data: { archive: archive1 },
success: function(response) {
if (response['result'] === "ok") {
BootstrapDialog.alert(
{
message:response['reason'],
type: BootstrapDialog.TYPE_SUCCESS,
action: function(dialog) {
dialog.close();
},
callback: function(result) {
dialogRef.close();
$('#collection-summary-modal').modal('show');
// window.location.href = 'email-sources';
}
}
);
dialogRef.close();
// window.location.href = 'collections';
// $('#collection-summary-modal').modal('show');
} else {
BootstrapDialog.show({message:response['reason'], type: BootstrapDialog.TYPE_WARNING});
}
},
error: function() {
BootstrapDialog.show({message:response['reason'], type: BootstrapDialog.TYPE_WARNING});
}
});
}
},
{
id: 'btn-cancel',
// icon: 'glyphicon glyphicon-check',
label: 'No',
cssClass: 'btn-default',
autospin: false,
action: function(dialogRef){
dialogRef.close();
$('#collection-summary-modal').modal('show');
}
}
]
});
} // End of updateHash

</script>
<% } %>
135 changes: 135 additions & 0 deletions src/java/edu/stanford/epadd/util/HashUpdate.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
Servlet to update checksum
*/
package edu.stanford.epadd.util;

import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.File;
import org.json.JSONObject;
import org.apache.commons.io.FileUtils;
import edu.stanford.muse.index.Archive;
import edu.stanford.muse.index.ArchiveReaderWriter;
import edu.stanford.muse.util.Util;
import edu.stanford.muse.webapp.JSPHelper;

import gov.loc.repository.bagit.creator.BagCreator;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.hash.StandardSupportedAlgorithms;
import java.util.Arrays;
import java.nio.file.Paths;
import java.security.NoSuchAlgorithmException;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

@WebServlet(name = "HashUpdate", urlPatterns = {"/ajax/hashUpdate"})
public class HashUpdate extends HttpServlet {

/**
* Processes requests for both HTTP <code>GET</code> and <code>POST</code>
* methods.
*
* @param request servlet request
* @param response servlet response
* @throws ServletException if a servlet-specific error occurs
* @throws IOException if an I/O error occurs
*/
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");

JSONObject json = new JSONObject();
String jsonResult="failed", jsonReason="";

String archiveID = request.getParameter("archive");
Archive archive = ArchiveReaderWriter.getArchiveForArchiveID(archiveID);


try {
StandardSupportedAlgorithms algorithm[] = { StandardSupportedAlgorithms.MD5, StandardSupportedAlgorithms.SHA256};
boolean includeHiddenFiles = false;
Logger log = LogManager.getLogger(DelFolder.class);

if (archive != null) {
archive.close();

File tmp = Util.createTempDirectory();
tmp.delete();
FileUtils.moveDirectory(Paths.get(archive.baseDir+File.separatorChar+Archive.BAG_DATA_FOLDER).toFile(),tmp.toPath().toFile());
File wheretocopy = Paths.get(archive.baseDir).toFile();
Util.deleteDir(wheretocopy.getPath(),log);
FileUtils.moveDirectory(tmp.toPath().toFile(),wheretocopy);

Bag bag = BagCreator.bagInPlace(Paths.get(archive.baseDir), Arrays.asList(algorithm), includeHiddenFiles);
archive.openForRead();
archive.setArchiveBag(bag);

jsonResult = "ok";
jsonReason = "Checksum is updated";
} else {
jsonReason = "Archive not found";
}

} catch (IOException | NoSuchAlgorithmException e1) {
jsonReason = e1.getMessage();
}

json.put("result", jsonResult);
json.put("reason", jsonReason);

try ( PrintWriter out = response.getWriter()) {
response.setContentType("application/json");
response.setCharacterEncoding("UTF-8");
out.print(json.toString());
out.flush();
} catch (Exception e) {
// e.printStackTrace();
Util.print_exception("DelFolder.PrintWriter", e, JSPHelper.log);
}
}

// <editor-fold defaultstate="collapsed" desc="HttpServlet methods. Click on the + sign on the left to edit the code.">
/**
* Handles the HTTP <code>GET</code> method.
*
* @param request servlet request
* @param response servlet response
* @throws ServletException if a servlet-specific error occurs
* @throws IOException if an I/O error occurs
*/
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
processRequest(request, response);
}

/**
* Handles the HTTP <code>POST</code> method.
*
* @param request servlet request
* @param response servlet response
* @throws ServletException if a servlet-specific error occurs
* @throws IOException if an I/O error occurs
*/
@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
processRequest(request, response);
}

/**
* Returns a short description of the servlet.
*
* @return a String containing servlet description
*/
@Override
public String getServletInfo() {
return "Delete a folder";
}// </editor-fold>

}
89 changes: 71 additions & 18 deletions src/java/edu/stanford/muse/index/Archive.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
2022-09-09 Fixed bug in generateExportableAssetsNormalizedMbox
2022-10-03 Changed the fileformat to IMAP for IMAP import
2022-10-03 Corrected function isMBOX() to check if the source is MBOX
2022-10-04 Added file ID same as MBOX for IMAP
2022-10-13 Added SHA-256 in updateFileInBag
*/
package edu.stanford.muse.index;

Expand Down Expand Up @@ -119,7 +121,7 @@ public class Archive implements Serializable {

public static final String TEMP_SUBDIR = System.getProperty("java.io.tmpdir");
public static final String INDEXES_SUBDIR = "indexes";
public static final String SESSIONS_SUBDIR = "sessions"; // original idea was that there would be different sessions on the same archive (index). but in practice we only have one session
public static final String SESSIONS_SUBDIR = "sessions"; // original idea was that there would be different sessions on the same archive (index). but in practice we only have one session
public static final String LEXICONS_SUBDIR = "lexicons";
private static final String FEATURES_SUBDIR = "mixtures";
public static final String IMAGES_SUBDIR = "images";
Expand Down Expand Up @@ -725,7 +727,9 @@ public void setFileMetadatasIMAP(Archive archive, String[] importedFiles) {

fm = new Archive.FileMetadata();
fm.filename = doc1.emailSource;
fm.fileID = folderName;
// fm.fileID = folderName;
// 2022-10-04
fm.fileID = "Collection/IMAP/" + StringUtils.leftPad("" + count, 4, "0") + "/" + folderName;
if (epaddPremis != null)
{
epaddPremis.setFileID(fm.fileID, folderName);
Expand Down Expand Up @@ -2532,14 +2536,24 @@ public static void saveArchiveBag(String destDir){
}
*/

// 2022-10-13
//needed to make this method because sometime we want to update a bag without loading the whole archive.
public static void updateFileInBag(Bag archiveBag, String fileOrDirectoryName, String baseDir){
String[] manifestinfofilename = {"manifest-md5.txt", "manifest-sha256.txt"};
String[] algorithms = {"MD5", "SHA-256"};
Path filepathname = Paths.get(fileOrDirectoryName);
Path baginfofile = Paths.get(baseDir+File.separatorChar+"bag-info.txt");
Path manifestinfofile = Paths.get(baseDir +File.separatorChar+"manifest-md5.txt");
// Path manifestinfofile = Paths.get(baseDir +File.separatorChar+"manifest-md5.txt");
Path[] manifestinfofile = new Path[2];
for (int i=0; i<2; i++) manifestinfofile[i] = Paths.get(baseDir +File.separatorChar+manifestinfofilename[i]);

//updatePayloadManifests(bag, algorithms, includeHidden);
MessageDigest messageDigest = null;
Map<Manifest, MessageDigest> manifestToMessageDigest= new HashMap<>();
// MessageDigest messageDigest = null;
// Map<Manifest, MessageDigest> manifestToMessageDigest= new HashMap<>();
MessageDigest[] messageDigest = {null, null};
Map<Manifest, MessageDigest>[] manifestToMessageDigest = new Map[2];
manifestToMessageDigest[0] = new HashMap<>();
manifestToMessageDigest[1] = new HashMap<>();
boolean includeHiddenFiles = false;
//updateMetadataFile(bag, metadata);

Expand All @@ -2554,9 +2568,14 @@ public static void updateFileInBag(Bag archiveBag, String fileOrDirectoryName, S
e.printStackTrace();
}

MessageDigest[] finalMessageDigest = new MessageDigest[2];
try {
messageDigest = MessageDigest.getInstance(StandardSupportedAlgorithms.MD5.name());
MessageDigest finalMessageDigest = messageDigest;
// messageDigest = MessageDigest.getInstance(StandardSupportedAlgorithms.MD5.name());
// MessageDigest finalMessageDigest = messageDigest;
for (int i=0; i<2; i++) {
messageDigest[i] = MessageDigest.getInstance(algorithms[i]);
finalMessageDigest[i] = messageDigest[i];
}
/*A very subtle bug was introduced. The case is as following; a file gets deleted from the directory but the manifest has entry for the deleted file as well.
When the manifest construction algorithm is iterated over the directory tree then it recomputes the hash for all files present but it does not remove the hash for deleted files. As a result,
although the file is deleted, its entry remains in the tag manifest file resulting in the failure of checksum. For fix; we remove entry for all those files
Expand All @@ -2568,15 +2587,26 @@ public static void updateFileInBag(Bag archiveBag, String fileOrDirectoryName, S
!entry.getKey().startsWith(new File(fileOrDirectoryName).toPath())
).collect(Collectors.toMap(Map.Entry::getKey,Map.Entry::getValue));
manifest.setFileToChecksumMap(mm);
manifestToMessageDigest.put(manifest, finalMessageDigest);
// manifestToMessageDigest.put(manifest, finalMessageDigest);
if ( manifest.getAlgorithm().toString().equals(algorithms[0])) manifestToMessageDigest[0].put(manifest, finalMessageDigest[0]);
else manifestToMessageDigest[1].put(manifest, finalMessageDigest[1]);
});
CreatePayloadManifestsVistor sut = new CreatePayloadManifestsVistor(manifestToMessageDigest, includeHiddenFiles);
Files.walkFileTree(filepathname, sut);
// CreatePayloadManifestsVistor sut = new CreatePayloadManifestsVistor(manifestToMessageDigest, includeHiddenFiles);
// Files.walkFileTree(filepathname, sut);
CreatePayloadManifestsVistor sut1 = new CreatePayloadManifestsVistor(manifestToMessageDigest[0], includeHiddenFiles);
Files.walkFileTree(filepathname, sut1);
//Files.walkFileTree(baginfofile,sut);
/////Now write payload manifest
// archiveBag.getPayLoadManifests().clear();
// archiveBag.getPayLoadManifests().addAll(manifestToMessageDigest.keySet());
// ManifestWriter.writePayloadManifests(archiveBag.getPayLoadManifests(), PathUtils.getBagitDir(archiveBag),archiveBag.getRootDir(),archiveBag.getFileEncoding());
CreatePayloadManifestsVistor sut2 = new CreatePayloadManifestsVistor(manifestToMessageDigest[1], includeHiddenFiles);
Files.walkFileTree(filepathname, sut2);
archiveBag.getPayLoadManifests().clear();
archiveBag.getPayLoadManifests().addAll(manifestToMessageDigest.keySet());
archiveBag.getPayLoadManifests().addAll(manifestToMessageDigest[0].keySet());
archiveBag.getPayLoadManifests().addAll(manifestToMessageDigest[1].keySet());
ManifestWriter.writePayloadManifests(archiveBag.getPayLoadManifests(), PathUtils.getBagitDir(archiveBag),archiveBag.getRootDir(),archiveBag.getFileEncoding());

} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (IOException e) {
Expand All @@ -2585,16 +2615,39 @@ public static void updateFileInBag(Bag archiveBag, String fileOrDirectoryName, S

//updateTagManifests(bag, algorithms, includeHidden);
try {
MessageDigest finalMessageDigest = messageDigest;
manifestToMessageDigest.clear();
archiveBag.getTagManifests().forEach(manifest->manifestToMessageDigest.put(manifest, finalMessageDigest));
final CreateTagManifestsVistor tagVistor = new CreateTagManifestsVistor(manifestToMessageDigest, includeHiddenFiles);
// MessageDigest finalMessageDigest = messageDigest;
// manifestToMessageDigest.clear();
manifestToMessageDigest[0].clear();
manifestToMessageDigest[1].clear();
// archiveBag.getTagManifests().forEach(manifest->manifestToMessageDigest.put(manifest, finalMessageDigest));
archiveBag.getTagManifests().forEach( manifest->{
if ( manifest.getAlgorithm().toString().equals(algorithms[0])) manifestToMessageDigest[0].put(manifest, finalMessageDigest[0]);
else manifestToMessageDigest[1].put(manifest, finalMessageDigest[1]);
}
);

// final CreateTagManifestsVistor tagVistor = new CreateTagManifestsVistor(manifestToMessageDigest, includeHiddenFiles);
final CreateTagManifestsVistor tagVistor1 = new CreateTagManifestsVistor(manifestToMessageDigest[0], includeHiddenFiles);
//Files.walkFileTree(filepathname, tagVistor);
Files.walkFileTree(baginfofile,tagVistor);
Files.walkFileTree(manifestinfofile,tagVistor);
// Files.walkFileTree(baginfofile,tagVistor);
// Files.walkFileTree(manifestinfofile,tagVistor);
Files.walkFileTree(baginfofile,tagVistor1);
Files.walkFileTree(manifestinfofile[0],tagVistor1);
Files.walkFileTree(manifestinfofile[1],tagVistor1);
//update bag'stagemanifest
// archiveBag.getTagManifests().clear();
// archiveBag.getTagManifests().addAll(manifestToMessageDigest.keySet());
// ManifestWriter.writeTagManifests(archiveBag.getTagManifests(), PathUtils.getBagitDir(archiveBag), archiveBag.getRootDir(), archiveBag.getFileEncoding());

final CreateTagManifestsVistor tagVistor2 = new CreateTagManifestsVistor(manifestToMessageDigest[1], includeHiddenFiles);
//Files.walkFileTree(filepathname, tagVistor);
Files.walkFileTree(baginfofile,tagVistor2);
Files.walkFileTree(manifestinfofile[0],tagVistor2);
Files.walkFileTree(manifestinfofile[1],tagVistor2);
//update bag'stagemanifest
archiveBag.getTagManifests().clear();
archiveBag.getTagManifests().addAll(manifestToMessageDigest.keySet());
archiveBag.getTagManifests().addAll(manifestToMessageDigest[0].keySet());
archiveBag.getTagManifests().addAll(manifestToMessageDigest[1].keySet());
ManifestWriter.writeTagManifests(archiveBag.getTagManifests(), PathUtils.getBagitDir(archiveBag), archiveBag.getRootDir(), archiveBag.getFileEncoding());
} catch (IOException e) {
e.printStackTrace();
Expand Down
5 changes: 4 additions & 1 deletion src/java/edu/stanford/muse/index/ArchiveReaderWriter.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
2022-09-13 fixed bug in readArchiveIfPresent
2022-10-13 added SHA256 checksum
*/
package edu.stanford.muse.index;

Expand Down Expand Up @@ -392,7 +393,9 @@ public static boolean saveArchive(String baseDir, String name, Archive archive,
}*/
//if archivesave mode is freshcreation then create a bag around basedir and set bag as this one..
if(mode== Archive.Save_Archive_Mode.FRESH_CREATION){
StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.MD5;
// 2022-10-13
// StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.MD5;
StandardSupportedAlgorithms algorithm[] = { StandardSupportedAlgorithms.MD5, StandardSupportedAlgorithms.SHA256};
boolean includeHiddenFiles = false;
try {
archive.close();
Expand Down

0 comments on commit bcf39e7

Please sign in to comment.