Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
878b0cb
docs(plans): start astral-speed-improvements; fold shipped plans into…
ypriverol Apr 28, 2026
eee9fa6
docs(plans): consolidate to 5x roadmap; adopt milestone-commit shippi…
ypriverol Apr 28, 2026
960c664
docs(plans): Phase A retrospective; revert in-tree code via separate …
ypriverol Apr 28, 2026
684abef
docs(plans): Phase E retrospective — parallelism win not replicable u…
ypriverol Apr 28, 2026
86ff529
docs(plans): Phase E GC-pressure follow-up — bigger heap helps 8t but…
ypriverol Apr 29, 2026
019facd
docs(plans): Phase E final disproof — anti-scaling and ForkJoin win w…
ypriverol Apr 29, 2026
7a684f2
fix(bench-ci): unbreak PXD001819 CI after PR #23 mzIdentML removal
ypriverol Apr 29, 2026
781738e
feat(phase-b-telemetry): add opt-in counter for pairing fan-out verif…
ypriverol Apr 29, 2026
05ec066
fix(calibrator): isolate pre-pass at iso=[0,0] + outlier-filter resid…
ypriverol Apr 29, 2026
7c027f8
feat(phase-b): expose tightening formula constants as system properties
ypriverol Apr 29, 2026
aac389c
feat(calibrator): stratify residuals by spec_eValue, keep top MIN_CON…
ypriverol Apr 29, 2026
f1a6e62
docs(plans): record Phase B Astral win after stratification fix
ypriverol Apr 29, 2026
8070e79
docs(plans): SHIPPED.md Active section reflects Phase B win
ypriverol Apr 29, 2026
d85399b
docs(plans): three-dataset Phase B validation table in SHIPPED.md
ypriverol Apr 29, 2026
957a6e9
docs(plans): Experiment 2 design — exact prefix mass-interval pruning
ypriverol Apr 29, 2026
4241fbb
feat(experiment-2): mass-interval pruning scaffold (off by default; C…
ypriverol Apr 29, 2026
f7310e9
docs(plans): Experiment 2 status header — kill gate hit on wall
ypriverol Apr 29, 2026
0c697dd
perf(experiment-2): replace TreeMap.subMap with binary-search on sort…
ypriverol Apr 29, 2026
a19b17f
docs(plans): Experiment 2 status header reflects Checkpoint 3 result
ypriverol Apr 29, 2026
8478651
perf(experiment-2): gate bound test on peptideLengthIndex >= minPepti…
ypriverol Apr 29, 2026
af65dd2
docs(plans): Experiment 2 Checkpoint 4 — gate-on-minPeptideLength shi…
ypriverol Apr 29, 2026
7a4a512
docs(plans): Experiment 2 Checkpoint 4 confirmation — 5-trial bench, …
ypriverol Apr 30, 2026
aa4aaae
chore: remove non-shippable runtime scaffolding; keep Phase B as the …
ypriverol Apr 30, 2026
5d9482d
fix(phase-b): isolate Spectrum state during calibration pre-pass
ypriverol May 1, 2026
6b8a177
chore: align .claude/plans/ and benchmark/ci/ with dev (drop from PR …
ypriverol May 1, 2026
0434bd1
feat(calibrator): expose maxSampled and minConfidentPsms as system pr…
ypriverol May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 59 additions & 12 deletions src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,15 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o

// Achievement B — two-pass precursor mass calibration (P2-cal).
// Runs a sampled pre-pass over the current file's SpecKeys to learn
// a per-file ppm shift, then stores it on DBSearchIOFiles so every
// task-local ScoredSpectraMap picks it up. OFF mode is a strict
// no-op: we skip the pre-pass entirely and never call the setter,
// so DBSearchIOFiles.precursorMassShiftPpm stays at its 0.0 default
// and ScoredSpectraMap.applyShift() takes its exact-zero fast path.
// a per-file ppm shift and a robust residual spread estimate. The
// shift is stored on DBSearchIOFiles so every task-local
// ScoredSpectraMap picks it up. When the user tolerance is ppm-based
// and the residuals are reliable, we also tighten the effective
// precursor window for the main pass. OFF mode is a strict no-op:
// we skip the pre-pass entirely, never call the setter, and keep the
// original tolerance objects unchanged.
DBSearchIOFiles currentIoFiles = params.getDBSearchIOList().get(ioIndex);
MassCalibrator.CalibrationStats calibrationStats = null;
if (params.getPrecursorCalMode() != SearchParams.PrecursorCalMode.OFF) {
long calStart = System.currentTimeMillis();
MassCalibrator calibrator = new MassCalibrator(
Expand All @@ -366,22 +369,66 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
specKeyList,
leftPrecursorMassTolerance,
rightPrecursorMassTolerance,
minIsotopeError,
maxIsotopeError,
specDataType);
double shiftPpm = calibrator.learnPrecursorShiftPpm(ioIndex);
calibrationStats = calibrator.learnCalibrationStats(ioIndex);
double shiftPpm = calibrationStats.getShiftPpm();
boolean applyLearnedShift = shiftPpm != 0.0
|| params.getPrecursorCalMode() == SearchParams.PrecursorCalMode.ON;
if (applyLearnedShift) {
currentIoFiles.setPrecursorMassShiftPpm(shiftPpm);
System.out.printf("Precursor mass shift learned: %.3f ppm (elapsed: %.2f sec)%n",
shiftPpm, (System.currentTimeMillis() - calStart) / 1000.0);
}
if (calibrationStats != null && calibrationStats.hasReliableStats()) {
System.out.printf("Precursor mass shift learned: %.3f ppm from %d confident PSMs (robust sigma %.3f ppm; elapsed: %.2f sec)%n",
shiftPpm,
calibrationStats.getConfidentPsmCount(),
calibrationStats.getRobustSigmaPpm(),
(System.currentTimeMillis() - calStart) / 1000.0);
} else {
System.out.printf("Precursor mass calibration skipped (insufficient confident PSMs; elapsed: %.2f sec)%n",
(System.currentTimeMillis() - calStart) / 1000.0);
}
}
double precursorMassShiftPpm = currentIoFiles.getPrecursorMassShiftPpm();
Tolerance resolvedLeftPrecursorMassTolerance = leftPrecursorMassTolerance;
Tolerance resolvedRightPrecursorMassTolerance = rightPrecursorMassTolerance;
if (calibrationStats != null
&& calibrationStats.hasReliableStats()
&& leftPrecursorMassTolerance.isTolerancePPM()
&& rightPrecursorMassTolerance.isTolerancePPM()) {
// Tightening formula constants are configurable via system properties for
// falsification sweeps (e.g. -Dmsgfplus.tighteningSigmaMultiplier=2 to test
// whether a 2-sigma envelope buys real wall improvement on Astral). Defaults
// match MassCalibrator.DEFAULT_TIGHTENED_WINDOW_*. Production OFF-mode
// semantics are unchanged.
float sigmaMultiplier = Float.parseFloat(System.getProperty(
"msgfplus.tighteningSigmaMultiplier",
String.valueOf(MassCalibrator.DEFAULT_TIGHTENED_WINDOW_SIGMA_MULTIPLIER)));
float floorPpm = Float.parseFloat(System.getProperty(
"msgfplus.tighteningFloorPpm",
String.valueOf(MassCalibrator.DEFAULT_TIGHTENED_WINDOW_FLOOR_PPM)));
float marginPpm = Float.parseFloat(System.getProperty(
"msgfplus.tighteningMarginPpm",
String.valueOf(MassCalibrator.DEFAULT_TIGHTENED_WINDOW_MARGIN_PPM)));
float tightenedLeftPpm = MassCalibrator.tightenedTolerancePpm(
leftPrecursorMassTolerance.getValue(),
calibrationStats.getRobustSigmaPpm(),
sigmaMultiplier, floorPpm, marginPpm);
float tightenedRightPpm = MassCalibrator.tightenedTolerancePpm(
rightPrecursorMassTolerance.getValue(),
calibrationStats.getRobustSigmaPpm(),
sigmaMultiplier, floorPpm, marginPpm);
boolean tightened = tightenedLeftPpm < leftPrecursorMassTolerance.getValue()
|| tightenedRightPpm < rightPrecursorMassTolerance.getValue();
if (tightened) {
resolvedLeftPrecursorMassTolerance = new Tolerance(tightenedLeftPpm, true);
resolvedRightPrecursorMassTolerance = new Tolerance(tightenedRightPpm, true);
System.out.printf("Tightened precursor tolerance for main pass: left %.3f ppm -> %.3f ppm, right %.3f ppm -> %.3f ppm%n",
leftPrecursorMassTolerance.getValue(), tightenedLeftPpm,
rightPrecursorMassTolerance.getValue(), tightenedRightPpm);
}
}
final Tolerance effectiveLeftPrecursorMassTolerance = resolvedLeftPrecursorMassTolerance;
final Tolerance effectiveRightPrecursorMassTolerance = resolvedRightPrecursorMassTolerance;

List<MSGFPlusMatch> resultList;

Expand Down Expand Up @@ -468,8 +515,8 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
ScoredSpectraMap specScanner = new ScoredSpectraMap(
specAcc,
specKeyList.subList(taskStartIndex, taskEndIndex),
leftPrecursorMassTolerance,
rightPrecursorMassTolerance,
effectiveLeftPrecursorMassTolerance,
effectiveRightPrecursorMassTolerance,
minIsotopeError,
maxIsotopeError,
specDataType,
Expand Down
34 changes: 17 additions & 17 deletions src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,41 @@

public class DBScanner {

private int minPeptideLength;
private int maxPeptideLength;
private int maxMissedCleavages;
protected int minPeptideLength;
protected int maxPeptideLength;
protected int maxMissedCleavages;

/**
* Number of isoforms to consider per peptide.
* NUM_VARIANTS_PER_PEPTIDE is 128 in Constants.java
*/
private int maxNumVariantsPerPeptide;
protected int maxNumVariantsPerPeptide;

private AminoAcidSet aaSet;
protected AminoAcidSet aaSet;
private double[] aaMass;
private int[] intAAMass;

private Enzyme enzyme;
private int numPeptidesPerSpec;
protected Enzyme enzyme;
protected int numPeptidesPerSpec;

private final CompactSuffixArray sa;
private final int size;
protected final CompactSuffixArray sa;
protected final int size;
// to scan the database partially
// Input spectra
private final ScoredSpectraMap specScanner;
protected final ScoredSpectraMap specScanner;

private int minDeNovoScore;
private boolean ignoreNTermMetCleavage;
protected int minDeNovoScore;
protected boolean ignoreNTermMetCleavage;

// DB search results
private Map<SpecKey, PriorityQueue<DatabaseMatch>> specKeyDBMatchMap;
private Map<Integer, PriorityQueue<DatabaseMatch>> specIndexDBMatchMap;
protected Map<SpecKey, PriorityQueue<DatabaseMatch>> specKeyDBMatchMap;
protected Map<Integer, PriorityQueue<DatabaseMatch>> specIndexDBMatchMap;

private ProgressData progress;
private PrintStream output;
protected ProgressData progress;
protected PrintStream output;

// For output
private String threadName = "";
protected String threadName = "";

public DBScanner(
ScoredSpectraMap specScanner,
Expand Down
Loading
Loading