Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Dak’s new unit test that demonstrates a problem when parsing FASTA files
- Loading branch information
1 parent
906eb6e
commit 9de8e36
Showing
3 changed files
with
97 additions
and
0 deletions.
There are no files selected for viewing
69 changes: 69 additions & 0 deletions
69
biojava-core/src/test/java/org/biojava/nbio/core/fasta/TestFASTAReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package org.biojava.nbio.core.fasta; | ||
|
||
import java.io.InputStream; | ||
import java.util.LinkedHashMap; | ||
|
||
import static org.junit.Assert.* ; | ||
import static org.hamcrest.CoreMatchers.* ; | ||
|
||
import org.biojava.nbio.core.sequence.ProteinSequence; | ||
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; | ||
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; | ||
import org.biojava.nbio.core.sequence.io.FastaReader; | ||
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser; | ||
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator; | ||
import org.biojava.nbio.core.sequence.io.util.ClasspathResource; | ||
import org.junit.Test; | ||
|
||
|
||
public class TestFASTAReader { | ||
|
||
@Test | ||
public void testProcessAll() throws Exception { | ||
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/P02768.fasta"); | ||
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ; | ||
try( InputStream inStream = r.getInputStream() ) { | ||
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>( | ||
inStream, | ||
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), | ||
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); | ||
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process(); | ||
assertThat(sequences,is(notNullValue())); | ||
assertThat(sequences.size(),is(1)); | ||
} finally { | ||
if(fastaReader != null) fastaReader.close(); | ||
} | ||
} | ||
@Test | ||
public void testProcess1() throws Exception { | ||
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/P02768.fasta"); | ||
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ; | ||
try( InputStream inStream = r.getInputStream() ) { | ||
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>( | ||
inStream, | ||
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), | ||
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); | ||
assertThat(fastaReader.process(1),is(notNullValue())); | ||
assertThat(fastaReader.process(1),is(nullValue())); | ||
} finally { | ||
if(fastaReader != null) fastaReader.close(); | ||
} | ||
} | ||
@Test | ||
public void testProcess1v2() throws Exception { | ||
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/TwoSequences.fasta"); | ||
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ; | ||
try( InputStream inStream = r.getInputStream() ) { | ||
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>( | ||
inStream, | ||
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), | ||
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); | ||
assertThat(fastaReader.process(1),is(notNullValue())); | ||
assertThat(fastaReader.process(1),is(notNullValue())); | ||
assertThat(fastaReader.process(1),is(nullValue())); | ||
} finally { | ||
if(fastaReader != null) fastaReader.close(); | ||
} | ||
} | ||
|
||
} |
12 changes: 12 additions & 0 deletions
12
biojava-core/src/test/resources/org/biojava/nbio/core/fasta/P02768.fasta
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2 | ||
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF | ||
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP | ||
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF | ||
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV | ||
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK | ||
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR | ||
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE | ||
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV | ||
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL | ||
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV | ||
AASQAALGL |
16 changes: 16 additions & 0 deletions
16
biojava-core/src/test/resources/org/biojava/nbio/core/fasta/TwoSequences.fasta
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2 | ||
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF | ||
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP | ||
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF | ||
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV | ||
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK | ||
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR | ||
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE | ||
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV | ||
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL | ||
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV | ||
AASQAALGL | ||
>sp|P00698|LYSC_CHICK Lysozyme C OS=Gallus gallus GN=LYZ PE=1 SV=1 | ||
MRSLLILVLCFLPLAALGKVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQA | ||
TNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDG | ||
NGMNAWVAWRNRCKGTDVQAWIRGCRL |