Skip to content

Commit

Permalink
Add IUPAC amino acid and nucleotide base alphabet with ambiguity.
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Apr 29, 2022
1 parent 84044cc commit c96a992
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ trait Alphabet {
case class Symbol(label: Char, complement: Char)

/**
* The standard DNA alphabet with A,T,C, and G
* The standard DNA alphabet with A,T,C, and G.
*/
class DNAAlphabet extends Alphabet {

Expand All @@ -113,9 +113,36 @@ class DNAAlphabet extends Alphabet {
)
}

/**
* The IUPAC amino acid and nucleotide base alphabet with ambiguity codes.
*/
class IUPACAlphabet extends Alphabet {

override val caseSensitive = false

override val symbols = Seq(
Symbol('A', 'T'),
Symbol('T', 'A'),
Symbol('G', 'C'),
Symbol('C', 'G'),
Symbol('M', 'K'),
Symbol('R', 'Y'),
Symbol('W', 'W'),
Symbol('S', 'S'),
Symbol('Y', 'R'),
Symbol('K', 'M'),
Symbol('V', 'B'),
Symbol('H', 'D'),
Symbol('D', 'H'),
Symbol('B', 'V'),
Symbol('N', 'N')
)
}

/**
* Singleton object with references to all supported alphabets.
*/
object Alphabet {
val dna = new DNAAlphabet
val iupac = new IUPACAlphabet
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,12 @@ class AlphabetSuite extends ADAMFunSuite {
assert(4 == Alphabet.dna.size)
assert("CGNATAT" == Alphabet.dna.reverseComplement("ATATxcg", (c: Char) => Symbol('N', 'N')))
}

test("IUPAC alphabet") {
assert(15 == Alphabet.iupac.size)
assert("CGCGATAT" == Alphabet.iupac.reverseComplement("atatcgcg"))
assert("CGxATAT" == Alphabet.iupac.reverseComplement("ATATxcg"))
assert("NVHDBMRSWYKACGT" == Alphabet.iupac.reverseComplement("ACGTMRWSYKVHDBN"))
}
}

0 comments on commit c96a992

Please sign in to comment.