Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/main/java/edu/ucsd/msjava/mzml/StaxMzMLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,52 @@ private synchronized void preloadAllSpectra() throws IOException, XMLStreamExcep
} finally {
reader.close();
}
} catch (XMLStreamException e) {
throw annotate(e, "preload");
}
allLoaded = true;
long elapsed = System.currentTimeMillis() - startTime;
System.out.println("StAX mzML preload: " + cache.size() + " spectra loaded in " + elapsed + " ms");
}

/**
* Rethrow an {@link XMLStreamException} with a context-rich message. If
* the underlying error looks like a BOM or XML-prolog / encoding issue
* (the most common cause of "ParseError in XML prolog" on Windows),
* suggest the concrete fix.
*
* @param e the original Stax exception; wrapped as cause
* @param phase short tag identifying the parse phase ("index", "preload")
*/
private XMLStreamException annotate(XMLStreamException e, String phase) {
String msg = e.getMessage() == null ? "" : e.getMessage();
StringBuilder sb = new StringBuilder();
sb.append("Could not parse mzML file '").append(specFile.getAbsolutePath()).append("' during ").append(phase).append(".");
if (looksLikeBomOrPrologIssue(msg)) {
sb.append(" This usually means the file has a byte-order mark (BOM) or an encoding mismatch in the XML prolog. Verify that the file starts with `<?xml version=\"1.0\" encoding=\"UTF-8\"?>` with no leading whitespace or BOM (on Linux/macOS: `head -c 3 \"")
.append(specFile.getName()).append("\" | xxd`; a BOM shows as `ef bb bf`). Re-converting the raw file with ThermoRawFileParser or MSConvert usually resolves it. See docs/Troubleshooting.md for details.");
}
sb.append(" Underlying parser error: ").append(msg);
// Note: XMLStreamException(msg, location, nested) stores the cause as a
// "nested exception" but does NOT invoke Throwable.initCause, so
// getCause() returns null. Call initCause() explicitly so standard
// Java chaining (printStackTrace, causal frames) works.
XMLStreamException wrapped = new XMLStreamException(sb.toString(), e.getLocation());
wrapped.initCause(e);
return wrapped;
}

private static boolean looksLikeBomOrPrologIssue(String msg) {
if (msg == null) return false;
String m = msg.toLowerCase(java.util.Locale.ROOT);
return m.contains("prolog")
|| m.contains("bom")
|| m.contains("byte order mark")
|| m.contains("encoding")
|| m.contains("invalid character")
|| m.contains("content is not allowed");
}

/** Parse and return the full spectrum by its string ID. */
public Spectrum getSpectrumById(String specId) {
SpectrumIndex si = indexById.get(specId);
Expand Down Expand Up @@ -251,6 +291,8 @@ private void buildIndex() throws IOException, XMLStreamException {
} finally {
reader.close();
}
} catch (XMLStreamException e) {
throw annotate(e, "index");
}
}

Expand Down
80 changes: 80 additions & 0 deletions src/test/java/msgfplus/TestStaxMzMLParserErrorContext.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package msgfplus;

import edu.ucsd.msjava.mzml.StaxMzMLParser;
import org.junit.Assert;
import org.junit.Test;

import javax.xml.stream.XMLStreamException;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;

/**
* Covers Q8: when the mzML has a byte-order mark (BOM) or a malformed XML
* prolog, the constructor's {@link XMLStreamException} is re-thrown with an
* actionable message instead of Stax's terse "ParseError in XML prolog".
*/
public class TestStaxMzMLParserErrorContext {

private File writeBytesToTempMzml(byte[] bytes) throws IOException {
Path tmp = Files.createTempFile("msgfplus-stax-context-", ".mzML");
Files.write(tmp, bytes);
tmp.toFile().deleteOnExit();
return tmp.toFile();
}

@Test
public void bomPrefixedMzmlGivesActionableMessage() throws Exception {
// UTF-8 BOM (EF BB BF) followed by a plausible-looking mzML prolog.
byte[] bom = new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
byte[] prolog = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><mzML/>".getBytes(StandardCharsets.UTF_8);
byte[] content = new byte[bom.length + prolog.length];
System.arraycopy(bom, 0, content, 0, bom.length);
System.arraycopy(prolog, 0, content, bom.length, prolog.length);

File mzml = writeBytesToTempMzml(content);

try {
new StaxMzMLParser(mzml);
// Note: some Stax implementations tolerate a UTF-8 BOM. If this one
// does, the test becomes a no-op — we can't force the parser to
// fail, so just return.
} catch (XMLStreamException e) {
String msg = e.getMessage();
Assert.assertNotNull("Wrapped XMLStreamException should carry a message", msg);
Assert.assertTrue("Message should include the full file path for context",
msg.contains(mzml.getAbsolutePath()));
Assert.assertTrue("Message should mention the BOM / prolog / encoding hint",
msg.contains("byte-order mark") || msg.contains("BOM")
|| msg.contains("XML prolog") || msg.contains("encoding"));
Assert.assertTrue("Message should point at Troubleshooting.md",
msg.contains("Troubleshooting.md"));
}
}

@Test
public void garbledPrologAlwaysProducesAnnotatedMessage() throws Exception {
// Definitely-malformed XML (just random text, no prolog at all).
// Every Stax impl rejects this.
byte[] garbage = "this is not xml at all".getBytes(StandardCharsets.UTF_8);
File mzml = writeBytesToTempMzml(garbage);

try {
new StaxMzMLParser(mzml);
Assert.fail("Parsing random bytes as mzML should not succeed");
} catch (XMLStreamException e) {
String msg = e.getMessage();
Assert.assertNotNull(msg);
Assert.assertTrue("Message should include the index phase tag",
msg.contains("during index"));
Assert.assertTrue("Message should include the file path",
msg.contains(mzml.getAbsolutePath()));
Assert.assertTrue("Original parser error should be preserved in the message",
msg.contains("Underlying parser error"));
Assert.assertSame("Original exception should be the cause",
e.getCause().getClass(), XMLStreamException.class);
}
}
}
Loading