Skip to content

Commit

Permalink
Dak’s new unit test that demonstrates a problem when parsing FASTA files
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasprlic committed Apr 19, 2017
1 parent 906eb6e commit 9de8e36
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 0 deletions.
@@ -0,0 +1,69 @@
package org.biojava.nbio.core.fasta;

import java.io.InputStream;
import java.util.LinkedHashMap;

import static org.junit.Assert.* ;
import static org.hamcrest.CoreMatchers.* ;

import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.io.FastaReader;
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
import org.biojava.nbio.core.sequence.io.util.ClasspathResource;
import org.junit.Test;


public class TestFASTAReader {

@Test
public void testProcessAll() throws Exception {
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/P02768.fasta");
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
try( InputStream inStream = r.getInputStream() ) {
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
assertThat(sequences,is(notNullValue()));
assertThat(sequences.size(),is(1));
} finally {
if(fastaReader != null) fastaReader.close();
}
}
@Test
public void testProcess1() throws Exception {
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/P02768.fasta");
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
try( InputStream inStream = r.getInputStream() ) {
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
assertThat(fastaReader.process(1),is(notNullValue()));
assertThat(fastaReader.process(1),is(nullValue()));
} finally {
if(fastaReader != null) fastaReader.close();
}
}
@Test
public void testProcess1v2() throws Exception {
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/TwoSequences.fasta");
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
try( InputStream inStream = r.getInputStream() ) {
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
assertThat(fastaReader.process(1),is(notNullValue()));
assertThat(fastaReader.process(1),is(notNullValue()));
assertThat(fastaReader.process(1),is(nullValue()));
} finally {
if(fastaReader != null) fastaReader.close();
}
}

}
@@ -0,0 +1,12 @@
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
AASQAALGL
@@ -0,0 +1,16 @@
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
AASQAALGL
>sp|P00698|LYSC_CHICK Lysozyme C OS=Gallus gallus GN=LYZ PE=1 SV=1
MRSLLILVLCFLPLAALGKVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQA
TNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDG
NGMNAWVAWRNRCKGTDVQAWIRGCRL

0 comments on commit 9de8e36

Please sign in to comment.