Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Adding itraq scan summarizer for Ebbing's data.

  • Loading branch information...
commit 5448734c7a260cd72cbd93d408b2671f7ea11b44 1 parent 8eafe0d
@jmchilton authored
View
86 projects/TropixProteomicsCore/src/main/edu/umn/msi/tropix/proteomics/conversion/Scan.java
@@ -16,15 +16,22 @@
package edu.umn.msi.tropix.proteomics.conversion;
+import java.util.Iterator;
+
import org.apache.commons.io.FilenameUtils;
+import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableList;
+
import edu.umn.msi.tropix.proteomics.conversion.DtaNameUtils.DtaNameSummary;
+import edu.umn.msi.tropix.proteomics.conversion.Scan.Peak;
// TODO: Construct Scan objects using the builder pattern
-public class Scan implements Cloneable {
+public class Scan implements Cloneable, Iterable<Peak> {
private short precursorCharge = 0;
private float precursorIntensity = 0.0f;
private float precursorMz = 0.0f;
+ private float rt = 0.0f;
private final int msLevel;
private final int number;
private int alt = 0;
@@ -38,11 +45,19 @@ public boolean isPrecursorChargeSet() {
}
public boolean isPrecursorIntensitySet() {
- return precursorIntensity > 0.0;
+ return precursorIntensity > 0.0f;
}
public boolean isPrecursorMzSet() {
- return precursorMz > 0.0;
+ return precursorMz > 0.0f;
+ }
+
+ public boolean isRtSet() {
+ return rt > 0.0f;
+ }
+
+ public float getRt() {
+ return rt;
}
public Scan(final int msLevel, final int number, final double[] peaks) {
@@ -64,6 +79,18 @@ public Scan clone() {
return peaks;
}
+ public Iterator<Peak> iterator() {
+ return getPeakObjects().iterator();
+ }
+
+ public ImmutableList<Peak> getPeakObjects() {
+ final ImmutableList.Builder<Peak> peakListBuilder = ImmutableList.builder();
+ for(int i = 0; i < peaks.length; i += 2) {
+ peakListBuilder.add(buildPeak(peaks[i], peaks[i + 1]));
+ }
+ return peakListBuilder.build();
+ }
+
/**
* @return The precursorCharge of this scan, or 0 if it is unknown.
*/
@@ -136,6 +163,10 @@ public void setAlt(final int alt) {
this.alt = alt;
}
+ public void setRt(final float rt) {
+ this.rt = rt;
+ }
+
public void setParentFileName(final String parentFilePath) {
this.parentFileName = FilenameUtils.getName(parentFilePath);
if(DtaNameUtils.isDtaName(parentFileName)) {
@@ -166,4 +197,53 @@ public String toString() {
return org.apache.commons.lang.builder.ToStringBuilder.reflectionToString(this);
}
+ public Optional<Peak> mostIntensePeak(final double fromMz, final double toMz) {
+ Peak mostIntensePeakFound = null;
+ for(final Peak peak : this) {
+ if(peak.onMzRange(fromMz, toMz)) {
+ if(mostIntensePeakFound == null || peak.moreIntense(mostIntensePeakFound)) {
+ mostIntensePeakFound = peak;
+ }
+ }
+ }
+ return Optional.fromNullable(mostIntensePeakFound);
+ }
+
+ private Peak buildPeak(final double mz, final double intensity) {
+ return new Peak(mz, intensity, this);
+ }
+
+ public static class Peak implements Cloneable {
+ private final double intensity;
+ private final double mz;
+ private final Scan scan;
+
+ public Peak(final double mz, final double intensity, Scan scan) {
+ this.intensity = intensity;
+ this.mz = mz;
+ this.scan = scan;
+ }
+
+ public boolean onMzRange(final double fromMz, final double toMz) {
+ return fromMz <= mz && toMz >= mz;
+ }
+
+ public double getIntensity() {
+ return intensity;
+ }
+
+ public double getMz() {
+ return mz;
+ }
+
+ public Scan getScan() {
+ return scan;
+ }
+
+ public boolean moreIntense(final Peak other) {
+ return this.intensity > other.intensity;
+ }
+
+ }
+
}
View
22 projects/TropixProteomicsCore/src/main/edu/umn/msi/tropix/proteomics/conversion/impl/MgfParseUtils.java
@@ -17,33 +17,43 @@
private static final Pattern SPOT_SET_PATTERN = Pattern.compile(".*Spot Set: \\w+\\\\(\\w+).*");
private static final Pattern AB_SCIEX_TITLE_PATTERN = Pattern.compile(".*Label:.*Peak_List_Id: (\\d+).*");
private static final Pattern READW_4_MASCOT_TITLE_PATTERN = Pattern.compile("Scan:(\\d+) .*");
-
+ private static final Pattern READW_4_MASCOT_RT_PATTERN = Pattern.compile(".*\\sRT:([\\d\\.]+)\\s.*");
+
static boolean isAbSciexTitle(final String line) {
return AB_SCIEX_TITLE_PATTERN.matcher(line).matches();
}
-
+
static boolean isReadw4MascotTitle(final String line) {
return READW_4_MASCOT_TITLE_PATTERN.matcher(line).matches();
}
-
+
static int getReadw4MascotScanNumber(final String line) {
final Matcher matcher = READW_4_MASCOT_TITLE_PATTERN.matcher(line);
Preconditions.checkState(matcher.matches());
return Integer.parseInt(matcher.group(1));
}
-
+
+ static Float getReadw4MascotRt(final String line) {
+ Float rt = null;
+ final Matcher matcher = READW_4_MASCOT_RT_PATTERN.matcher(line);
+ if(matcher.matches()) {
+ rt = Float.parseFloat(matcher.group(1));
+ }
+ return rt;
+ }
+
static int getAbSciexScanNumber(final String line) {
final Matcher abSciexMatcher = AB_SCIEX_TITLE_PATTERN.matcher(line);
Preconditions.checkArgument(abSciexMatcher.matches());
return Integer.parseInt(abSciexMatcher.group(1));
}
-
+
static Optional<String> parseDefaultParentName(final String line) {
Optional<String> parentName = Optional.absent();
if(COM_PATTERN.matcher(line).matches()) {
final Matcher spotSetMatcher = SPOT_SET_PATTERN.matcher(line);
if(spotSetMatcher.matches()) {
-
+
parentName = Optional.of(spotSetMatcher.group(1));
}
}
View
5 projects/TropixProteomicsCore/src/main/edu/umn/msi/tropix/proteomics/conversion/impl/MgfScanExtracter.java
@@ -29,6 +29,7 @@
private Optional<String> defaultTitleStr = Optional.<String>absent();
private float precursorMz = 0.0f;
private float precursorIntensity = 0.0f;
+ private Float rt = null;
private int end = 0;
private int start = 0;
private List<Short> charges = null;
@@ -103,6 +104,7 @@ private void handleTitleLine(final String line) {
this.titleStr = defaultTitleStr.get();
} else if(MgfParseUtils.isReadw4MascotTitle(titleStr)) {
this.end = MgfParseUtils.getReadw4MascotScanNumber(titleStr);
+ this.rt = MgfParseUtils.getReadw4MascotRt(titleStr);
// this.titleStr = defaultTitleStr.get();
// These files usually don't specify a charge state, so we need to guess.
this.guessChargeState = true;
@@ -182,6 +184,9 @@ private void buildPeaksArray() {
templateScan.setPrecursorMz(precursorMz);
templateScan.setPrecursorIntensity(precursorIntensity);
templateScan.setParentFileName(titleStr);
+ if(rt != null) {
+ templateScan.setRt(rt.floatValue());
+ }
final List<Scan> scansToCache = Lists.newArrayList();
if(charges == null && templateScan.isPrecursorChargeSet()) {
View
10 ...ts/TropixProteomicsCore/src/test/edu/umn/msi/tropix/proteomics/conversion/impl/MgfScanExtracterTest.java
@@ -24,18 +24,19 @@ public void testReadMsmTitle() {
assert Math.abs(scan.getPrecursorMz() - 408.31064f) < 0.0001;
assert scan.getNumber() == 5 : scan.getNumber();
}
-
+
@Test(groups = "unit")
public void testReadw4MascotTitle() {
final Scan scan = getOnlyScan(getReadw4MascotLines());
assert scan.getNumber() == 67 : scan.getNumber();
assert Math.abs(scan.getPrecursorMz() - 461.8420f) < 0.0001;
assert scan.getPrecursorCharge() == (short) 1;
+ assert scan.getRt() == (float) 0.012 : scan.getRt();
}
-
+
private Scan getOnlyScan(final Iterator<String> mgfSectionLines) {
final MgfScanExtracter extracter = new MgfScanExtracter(mgfSectionLines, null, Optional.of("030911_fallo002_baldr001_10035_CvsW_4plx"));
- final Scan scan = Iterables.getOnlyElement(extracter.extractScans());
+ final Scan scan = Iterables.getOnlyElement(extracter.extractScans());
return scan;
}
@@ -67,6 +68,7 @@ public void testAbSciexMgf() {
assert scan.getParentFileName().equals("030911_fallo002_baldr001_10035_CvsW_4plx") : scan.getParentFileName();
assert scan.getPrecursorCharge() == 1;
assert scan.getNumber() == 2720588 : scan.getNumber();
+
}
private Iterator<String> getAbSciexLines() {
@@ -81,7 +83,7 @@ public void testAbSciexMgf() {
final String resourceName = "msm_scan_section.txt";
return getResourceLines(resourceName);
}
-
+
private Iterator<String> getReadw4MascotLines() {
return getResourceLines("readw4mascot_section.txt");
}
View
83 projects/TropixProteomicsCore/src/test/edu/umn/msi/tropix/proteomics/test/ITraqScanSummarizer.java
@@ -0,0 +1,83 @@
+package edu.umn.msi.tropix.proteomics.test;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.util.Iterator;
+
+import com.google.common.base.Optional;
+
+import edu.umn.msi.tropix.common.io.FileUtils;
+import edu.umn.msi.tropix.common.io.FileUtilsFactory;
+import edu.umn.msi.tropix.common.io.IOUtils;
+import edu.umn.msi.tropix.common.io.IOUtilsFactory;
+import edu.umn.msi.tropix.proteomics.conversion.Scan;
+import edu.umn.msi.tropix.proteomics.conversion.Scan.Peak;
+import edu.umn.msi.tropix.proteomics.conversion.impl.MgfParser;
+
+public class ITraqScanSummarizer {
+ private static final FileUtils FILE_UTILS = FileUtilsFactory.getInstance();
+ private static final IOUtils IO_UTILS = IOUtilsFactory.getInstance();
+ private static MgfParser mgfParser = new MgfParser();
+
+ public static void main(final String[] args) {
+ final File directory = new File("/home/msi/chilton/to_summarize");
+ final File[] mzxmlFiles = directory.listFiles(new FileFilter() {
+ public boolean accept(final File pathname) {
+ return pathname.getName().toLowerCase().endsWith("mgf");
+ }
+ });
+ for(final File mzxmlFile : mzxmlFiles) {
+ summarize(mzxmlFile);
+ }
+ }
+
+ private static void summarize(final Iterator<Scan> scans, final PrintWriter writer) {
+ while(scans.hasNext()) {
+ final Scan scan = scans.next();
+ summarize(scan, writer);
+ writer.append("\n");
+ }
+ }
+
+ private static void summarize(final Scan scan, final PrintWriter writer) {
+ final Optional<Peak> peak114 = scan.mostIntensePeak(113.9, 114.3);
+ final Optional<Peak> peak117 = scan.mostIntensePeak(116.9, 117.3);
+
+ appendValue(scan.getNumber(), writer);
+ summarizePeak(peak114, writer);
+ summarizePeak(peak117, writer);
+ appendValue(scan.getPrecursorMz(), writer);
+ appendValue(scan.getRt(), writer);
+ }
+
+ private static void summarizePeak(final Optional<Peak> peak, final PrintWriter writer) {
+ if(peak.isPresent()) {
+ appendValue(peak.get().getIntensity(), writer);
+ appendValue(peak.get().getMz(), writer);
+ } else {
+ appendValue("NA", writer);
+ appendValue("NA", writer);
+ }
+ }
+
+ private static void appendValue(final Object value, final PrintWriter writer) {
+ writer.append(value.toString());
+ writer.append("\t");
+ }
+
+ private static void summarize(final File mzxmlFile) {
+ final File summaryFile = new File(mzxmlFile.getPath() + "_summary.csv");
+ final InputStream mzxmlStream = FILE_UTILS.getFileInputStream(mzxmlFile);
+ final PrintWriter summaryWriter = new PrintWriter(FILE_UTILS.getFileWriter(summaryFile));
+ try {
+ final Iterator<Scan> scanIterator = mgfParser.parserMgf(mzxmlStream);
+ summarize(scanIterator, summaryWriter);
+ } finally {
+ IO_UTILS.closeQuietly(mzxmlStream);
+ IO_UTILS.closeQuietly(summaryWriter);
+ }
+ }
+
+}
Please sign in to comment.
Something went wrong with that request. Please try again.