Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/compomics/colims
Browse files Browse the repository at this point in the history
  • Loading branch information
nielshulstaert committed Sep 11, 2018
2 parents be1acb2 + 29d96ec commit 4bd855e
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 31 deletions.
@@ -1,6 +1,7 @@
package com.compomics.colims.core.io.fasta;

import com.compomics.colims.model.FastaDb;
import com.compomics.colims.model.Protein;
import com.compomics.colims.model.enums.SearchEngineType;
import com.compomics.util.protein.Header;
import org.slf4j.Logger;
Expand Down Expand Up @@ -42,8 +43,8 @@ public class FastaDbParser {
* @return the protein sequences map (key: protein accession; value: protein sequence)
* @throws IOException thrown in case of an input/output related problem
*/
public Map<String, String> parse(LinkedHashMap<FastaDb, Path> fastaDbs) throws IOException {
Map<String, String> proteinSequences = new HashMap<>();
public Map<String, Protein> parse(LinkedHashMap<FastaDb, Path> fastaDbs) throws IOException {
Map<String, Protein> proteinSequences = new HashMap<>();
try {
for (Map.Entry<FastaDb, Path> entry : fastaDbs.entrySet()) {
FastaDb fastaDb = entry.getKey();
Expand Down Expand Up @@ -153,7 +154,7 @@ public LinkedHashMap<String, String> testParseRule(Path fastaPath, String parseR
* @param fastaPath the FASTA path
* @throws IOException in case of file reading related problem
*/
private void parseWithRule(Map<String, String> proteinSequences, FastaDb fastaDb, Path fastaPath) throws IOException {
private void parseWithRule(Map<String, Protein> proteinSequences, FastaDb fastaDb, Path fastaPath) throws IOException {
try (BufferedReader bufferedReader = Files.newBufferedReader(fastaPath)) {
//compile the pattern
Pattern pattern;
Expand All @@ -170,11 +171,23 @@ private void parseWithRule(Map<String, String> proteinSequences, FastaDb fastaDb
if (line.startsWith(BLOCK_SEPARATOR)) {
//add limiting check for protein store to avoid growing
if (sequenceBuilder.length() > 0) {
Protein protein = new Protein();
protein.setSequence(sequenceBuilder.toString().trim());
String regex = "(?<!\\\\)" + Pattern.quote("|");
if(fastaHeader.substring(1).split(regex).length > 2){
if(fastaHeader.substring(1).split(regex)[2].split(SPLITTER).length >1){
String description =String.join(SPLITTER, Arrays.copyOfRange(fastaHeader.substring(1).split(regex)[2].split(SPLITTER), 1, fastaHeader.substring(1).split(regex)[2].split(SPLITTER).length));
if(description.contains(" OS=")){
protein.setDescription(description.substring(0, description.indexOf(" OS=")));
}
}

}
Matcher matcher = pattern.matcher(fastaHeader.substring(1));
if (matcher.find()) {
proteinSequences.putIfAbsent(matcher.group(1), sequenceBuilder.toString().trim());
proteinSequences.putIfAbsent(matcher.group(1), protein);
} else {
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], sequenceBuilder.toString().trim());
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], protein);
}
sequenceBuilder.setLength(0);
}
Expand All @@ -185,11 +198,19 @@ private void parseWithRule(Map<String, String> proteinSequences, FastaDb fastaDb
}
//last line
if (sequenceBuilder.length() > 0) {
Protein protein = new Protein();
protein.setSequence(sequenceBuilder.toString().trim());
if(fastaHeader.substring(1).split("|").length > 2 && fastaHeader.substring(1).split("|")[2].split(SPLITTER).length >1){
String description = fastaHeader.substring(1).split("|")[2].split(SPLITTER)[1];
if(description.contains(" OS=")){
protein.setDescription(description.substring(0, description.indexOf("OS=")));
}
}
Matcher matcher = pattern.matcher(fastaHeader.substring(1).split(SPLITTER)[0]);
if (matcher.find()) {
proteinSequences.putIfAbsent(matcher.group(1), sequenceBuilder.toString().trim());
proteinSequences.putIfAbsent(matcher.group(1), protein);
} else {
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], sequenceBuilder.toString().trim());
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], protein);
}
sequenceBuilder.setLength(0);
}
Expand All @@ -203,7 +224,7 @@ private void parseWithRule(Map<String, String> proteinSequences, FastaDb fastaDb
* @param fastaPath the FASTA path
* @throws IOException in case of file reading related problem
*/
private void parseWithoutRule(Map<String, String> proteinSequences, Path fastaPath) throws IOException {
private void parseWithoutRule(Map<String, Protein> proteinSequences, Path fastaPath) throws IOException {
try (BufferedReader bufferedReader = Files.newBufferedReader(fastaPath)) {
//start reading the file
final StringBuilder sequenceBuilder = new StringBuilder();
Expand All @@ -213,7 +234,10 @@ private void parseWithoutRule(Map<String, String> proteinSequences, Path fastaPa
if (line.startsWith(BLOCK_SEPARATOR)) {
//add limiting check for protein store to avoid growing
if (sequenceBuilder.length() > 0) {
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], sequenceBuilder.toString().trim());
Protein protein = new Protein();
protein.setSequence(sequenceBuilder.toString().trim());
// protein.setDescription(fastaHeader.substring(1).split(SPLITTER)[1]);
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], protein);
sequenceBuilder.setLength(0);
}
fastaHeader = line;
Expand All @@ -223,7 +247,9 @@ private void parseWithoutRule(Map<String, String> proteinSequences, Path fastaPa
}
//last line
if (sequenceBuilder.length() > 0) {
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], sequenceBuilder.toString().trim());
Protein protein = new Protein();
protein.setSequence(sequenceBuilder.toString().trim());
proteinSequences.putIfAbsent(fastaHeader.substring(1).split(SPLITTER)[0], protein);
sequenceBuilder.setLength(0);
}
}
Expand Down
Expand Up @@ -57,7 +57,7 @@ public static void main(final String[] args) throws IOException, ClassNotFoundEx
testFastaDb.setDatabaseName("test db");
LinkedHashMap<FastaDb, Path> fastaDbs = new LinkedHashMap<>();
fastaDbs.put(testFastaDb, Paths.get(testFastaDb.getFilePath()));
Map<String, String> parse = fastaDbParser.parse(fastaDbs);
// Map<String, String> parse = fastaDbParser.parse(fastaDbs);
System.out.println("=---------------");

}
Expand Down
Expand Up @@ -22,9 +22,10 @@ public interface ProteinService extends GenericService<Protein, Long> {
* looks for the protein in the cache first before querying the database.
*
* @param sequence the protein sequence
* @param description the protein description
* @return the found Protein instance
*/
Protein getProtein(String sequence);
Protein getProtein(String sequence, String description);

/**
* Clear the resources used by this resource.
Expand Down
Expand Up @@ -67,12 +67,13 @@ public void remove(Protein entity) {
}

@Override
public Protein getProtein(String sequence) {
public Protein getProtein(String sequence, String description) {
Protein targetProtein;

//first, look in the newly added proteins map
//@todo configure hibernate cache and check performance
targetProtein = cachedProteins.get(sequence);

if (targetProtein == null) {
//check if the protein is found in the db
targetProtein = findBySequence(sequence);
Expand All @@ -82,8 +83,13 @@ public Protein getProtein(String sequence) {
targetProtein = new Protein(sequence);

}
if(targetProtein.getDescription() == null && description != null){
targetProtein.setDescription(description);
}
//add to cached proteins
cachedProteins.put(sequence, targetProtein);
}else if(targetProtein.getDescription() == null && description != null){
targetProtein.setDescription(description);
}
return targetProtein;
}
Expand Down
@@ -1,6 +1,7 @@
package com.compomics.colims.core.io.fasta;

import com.compomics.colims.model.FastaDb;
import com.compomics.colims.model.Protein;
import com.compomics.colims.model.enums.SearchEngineType;
import org.junit.Assert;
import org.junit.Test;
Expand Down Expand Up @@ -52,7 +53,7 @@ public void testParse() throws IOException {
fastaDbs.put(testFastaDb, Paths.get(testFastaDb.getFilePath()));
fastaDbs.put(contaminantsFastaDb, Paths.get(contaminantsFastaDb.getFilePath()));

Map<String, String> parsedFastas = fastaDbParser.parse(fastaDbs);
Map<String, Protein> parsedFastas = fastaDbParser.parse(fastaDbs);

Assert.assertFalse(parsedFastas.containsKey(null));
Assert.assertEquals(20381, parsedFastas.size());
Expand All @@ -64,7 +65,8 @@ public void testParse() throws IOException {
Assert.assertTrue(parsedFastas.containsKey("Q2KJ03"));
Assert.assertEquals("MEEKTTQSVEGLKQYCLVPEREMKHIERHIHQTGKAGEFKNKPFRQVLQPPNETKLP" +
"KIMPEGHGIQNAQRRKQVNEREQMQTKDHQERMIRGRELAEQRLKERILRRSQSQLLTYEKHERVKEIK" +
"EFERVIAYLLFQPCSRSRIKVSILMDKSQNGEKVNTIVKPYQRKFLAMPPFLRSQIGKIRD", parsedFastas.get("Q2KJ03"));
"EFERVIAYLLFQPCSRSRIKVSILMDKSQNGEKVNTIVKPYQRKFLAMPPFLRSQIGKIRD", parsedFastas.get("Q2KJ03").getSequence());
Assert.assertEquals("Putative uncharacterized protein ZNRD1-AS1", parsedFastas.get("Q2KJ03").getDescription());
//look for 3 contaminants proteins
Assert.assertTrue(parsedFastas.containsKey("P00761"));
Assert.assertTrue(parsedFastas.containsKey("Q9TRI1"));
Expand All @@ -81,7 +83,9 @@ public void testParse() throws IOException {
"DLDSIIAEVRAQYEEIAQRSKAEAEALYQTKVQQLQISVDQHGDNLKNTKSEIAELNRMIQRLRAEIE" +
"NIKKQCQTLQVSVADAEQRGENALKDAHSKRVELEAALQQAKEELARMLREYQELMSVKLALDIEIAT" +
"YRKLLEGEEYRMSGECQSAVSISVVSGSTSTGGISGGLGSGSGFGLSSGFGSGSGSGFGFGGSVSGSS" +
"SSKIISTTTLNKRR", parsedFastas.get("P19013"));
"SSKIISTTTLNKRR", parsedFastas.get("P19013").getSequence());
Assert.assertEquals("Keratin, type II cytoskeletal 4", parsedFastas.get("P19013").getDescription());

}

@Test
Expand Down
Expand Up @@ -52,7 +52,7 @@ public class MaxQuantProteinGroupsParser {
* The map of parsed protein sequences (key: protein accession; value:
* protein sequence).
*/
private Map<String, String> proteinSequences = new HashMap<>();
private Map<String, Protein> proteinSequences = new HashMap<>();
/**
* The quantification method.
*/
Expand Down Expand Up @@ -165,8 +165,8 @@ private ProteinGroup parseProteinGroup(Map<String, String> proteinGroupsEntry, b
strippedAccession = org.apache.commons.lang3.StringUtils.substringAfter(strippedAccession, ProteinGroupHasProtein.CONTAMINANT_PREFIX);
}
//get the protein sequence by it's accession
String sequence = getProteinSequence(strippedAccession);
proteinGroup.getProteinGroupHasProteins().add(createProteinGroupHasProtein(sequence, accession, isMainGroup, proteinGroup));
Protein protein = getProteinSequence(strippedAccession);
proteinGroup.getProteinGroupHasProteins().add(createProteinGroupHasProtein(protein, accession, isMainGroup, proteinGroup));

if (isMainGroup) {
isMainGroup = false;
Expand Down Expand Up @@ -208,7 +208,7 @@ private ProteinGroup parseProteinGroup(Map<String, String> proteinGroupsEntry, b
* @return sequence the found sequence
* @throws IllegalArgumentException if the accession key is not found
*/
private String getProteinSequence(String accession) {
private Protein getProteinSequence(String accession) {
if (proteinSequences.containsKey(accession)) {
return proteinSequences.get(accession);
} else {
Expand All @@ -224,12 +224,13 @@ private String getProteinSequence(String accession) {
* @param mainGroup whether this is the main protein of the group
* @return a ProteinGroupHasProtein object
*/
private ProteinGroupHasProtein createProteinGroupHasProtein(String sequence, String accession, boolean mainGroup, ProteinGroup proteinGroup) {
private ProteinGroupHasProtein createProteinGroupHasProtein(Protein pr, String accession, boolean mainGroup, ProteinGroup proteinGroup) {
ProteinGroupHasProtein proteinGroupHasProtein = new ProteinGroupHasProtein();
proteinGroupHasProtein.setIsMainGroupProtein(mainGroup);

//get protein
Protein protein = proteinService.getProtein(sequence);
Protein protein = proteinService.getProtein(pr.getSequence(), pr.getDescription());
protein.setDescription(pr.getDescription());

//set protein accession
proteinGroupHasProtein.setProteinAccession(accession);
Expand Down Expand Up @@ -262,15 +263,15 @@ private void createProteinGroupQuant(ProteinGroup proteinGroup, AnalyticalRun an
//set the analytical run
proteinGroupQuant.setAnalyticalRun(analyticalRun);
//set the intensity
if (intensity != null) {
if (intensity != null && !Double.isNaN(Double.parseDouble(intensity))) {
proteinGroupQuant.setIntensity(Double.parseDouble(intensity));
}
//set the LFQ intensity
if (lfqIntensity != null) {
if (lfqIntensity != null && !Double.isNaN(Double.parseDouble(lfqIntensity))) {
proteinGroupQuant.setLfqIntensity(Double.parseDouble(lfqIntensity));
}
//set the iBAQ
if (ibaq != null) {
if (ibaq != null && !Double.isNaN(Double.parseDouble(ibaq))) {
proteinGroupQuant.setIbaq(Double.parseDouble(ibaq));
}
//set the MSMS Count
Expand Down
Expand Up @@ -67,7 +67,7 @@ public void map(final ProteinMatch proteinMatch, final PSParameter proteinGroupS
//get the utilities Protein from SequenceFactory
com.compomics.util.experiment.biology.Protein sourceProtein = SequenceFactory.getInstance().getProtein(proteinAccession);
//get protein
Protein matchedProtein = proteinService.getProtein(sourceProtein.getSequence());
Protein matchedProtein = proteinService.getProtein(sourceProtein.getSequence(), null);

if (proteinAccession.equals(proteinMatch.getMainMatch())) {
//set the is main protein group flag to true
Expand Down
25 changes: 20 additions & 5 deletions colims-model/src/main/java/com/compomics/colims/model/Protein.java
Expand Up @@ -28,13 +28,17 @@ public class Protein extends DatabaseEntity {
@Column(name = "protein_sequence", nullable = false)
// @Field(index=Index.YES, analyze=Analyze.NO, store=Store.NO)
private String sequence;

@Basic(optional = true)
@Column(name = "description", nullable = true)
private String description;
/**
* The ProteinGroupHasProtein instances from the join table between the protein group and protein tables.
* The ProteinGroupHasProtein instances from the join table between the
* protein group and protein tables.
*/
@OneToMany(mappedBy = "protein")
private List<ProteinGroupHasProtein> proteinGroupHasProteins = new ArrayList<>();


/**
* No-arg constructor.
*/
Expand All @@ -58,6 +62,14 @@ public void setSequence(String sequence) {
this.sequence = sequence;
}

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}

public List<ProteinGroupHasProtein> getProteinGroupHasProteins() {
return proteinGroupHasProteins;
}
Expand All @@ -66,11 +78,14 @@ public void setProteinGroupHasProteins(List<ProteinGroupHasProtein> proteinGroup
this.proteinGroupHasProteins = proteinGroupHasProteins;
}


@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

Protein protein = (Protein) o;

Expand Down
Expand Up @@ -243,6 +243,7 @@ create table project_has_user (
create table protein (
id bigint not null auto_increment,
protein_sequence longtext not null,
description varchar(200),
primary key (id)
);

Expand Down
Expand Up @@ -246,6 +246,7 @@ create table project_has_user (
create table protein (
id bigint not null auto_increment,
protein_sequence longtext not null,
description varchar(200),
primary key (id)
);

Expand Down

0 comments on commit 4bd855e

Please sign in to comment.