Skip to content

Commit

Permalink
Github issue 843 - Genebank parser (#919)
Browse files Browse the repository at this point in the history
* Github issue 843

* Changes

* Add transl_except to parser

* Test

* Test changes
  • Loading branch information
MaxGreil committed Mar 26, 2021
1 parent 15ce47d commit 4d1cf58
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ private void parseFeatureTag(List<String[]> section) {
Qualifier q = new Qualifier(key, val.replace('\n', ' '));
gbFeature.addQualifier(key, q);
} else {
if (key.equalsIgnoreCase("translation")) {
if (key.equalsIgnoreCase("translation") || key.equals("anticodon")
|| key.equals("transl_except")) {
// strip spaces from sequence
val = val.replaceAll("\\s+", "");
Qualifier q = new Qualifier(key, val);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.*;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.*;
Expand Down Expand Up @@ -333,6 +330,35 @@ public void readSequenceWithZeroSpanFeature() throws IOException, CompoundNotFou
assertEquals(Strand.NEGATIVE, fLocation.getStrand());
}

/**
* Biojava fails to parse anticodon and transl_except feature qualifiers when they line wrap.
* https://github.com/biojava/biojava/issues/843
*/
@Test
public void testGithub843() throws Exception {
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/NC_018080.gb"));
assertNotNull(inStream);

GenbankReader<DNASequence, NucleotideCompound> genbankDNA
= new GenbankReader<>(
inStream,
new GenericGenbankHeaderParser<>(),
new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())
);

LinkedHashMap<String, DNASequence> dnaSequences = genbankDNA.process();
assertNotNull(dnaSequences);

DNASequence dna = new ArrayList<>(dnaSequences.values()).get(0);
assertNotNull(dna);

FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound> tRNAFeature = dna.getFeaturesByType("tRNA").get(0);
String anticodon = tRNAFeature.getQualifiers().get("anticodon").get(0).getValue();
assertEquals("(pos:complement(1123552..1123554),aa:Leu,seq:caa)", anticodon);
String transl_except = tRNAFeature.getQualifiers().get("transl_except").get(0).getValue();
assertEquals("(pos:complement(1123552..1123554),aa:Leu)",transl_except);
}

/**
* Helper class to be able to verify the closed state of the input stream.
*/
Expand Down
28 changes: 28 additions & 0 deletions biojava-core/src/test/resources/NC_018080.gb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
LOCUS NC_018080 6402658 bp DNA circular CON 27-OCT-2020
DEFINITION Pseudomonas aeruginosa DK2
ACCESSION
VERSION .0
KEYWORDS .
FEATURES Location/Qualifiers
source 1..6402658
/organism="Pseudomonas aeruginosa DK2"
/mol_type="genomic DNA"
/strain="DK2"
/db_xref="taxon:1093787"
gene complement(1123502..1123588)
/locus_tag="PADK2_RS05265"
/old_locus_tag="PADK2_t29613"
tRNA complement(1123502..1123588)
/locus_tag="PADK2_RS05265"
/old_locus_tag="PADK2_t29613"
/product="tRNA-Leu"
/inference="COORDINATES: profile:tRNAscan-SE:2.0.6"
/note="Derived by automated computational analysis using
gene prediction method: tRNAscan-SE."
/anticodon=(pos:complement(1123552..1123554),aa:Leu,
seq:caa)
/transl_except=(pos:complement(1123552..1123554),
aa:Leu)
ORIGIN
1 tttaaagaga ccggcgattc tagtgaaatc gaacgggcag gtcaatttcc aaccagcgat
//

0 comments on commit 4d1cf58

Please sign in to comment.