Skip to content

Commit

Permalink
Merge 1a5a317 into 8407b40
Browse files Browse the repository at this point in the history
  • Loading branch information
IsmailM committed Nov 7, 2014
2 parents 8407b40 + 1a5a317 commit 9b81de0
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 27 deletions.
70 changes: 48 additions & 22 deletions lib/bionode-seq.js
Expand Up @@ -78,35 +78,61 @@ function mirrorAndLowerCaseMatrices(matricesArray) {

// ### Check sequence type

// Takes a sequence string and checks if it's DNA, RNA or protein. Follows [IUPAC notation](http://en.wikipedia.org/wiki/Nucleic_acid_notation#IUPAC_notation) which allows ambiguous sequence notation. In this case the sequence is labelled as ambiguous nucleotide rather than amino acid sequence.
// Takes a sequence string and checks if it's DNA, RNA or protein (returns 'dna', 'rna', 'protein' or undefined). Other optional arguments include threshold, length and index (see below).
//
// seq.checkType("ATGACCCTGAGAAGAGCACCG");
// => "dna"
// seq.checkType("AUGACCCUGAAGGUGAAUGAA");
// => "rna"
// seq.checkType("MAYKSGKRPTFFEVFKAHCSDS");
// => "protein"
// seq.checkType("AMTGACCCTGAGAAGAGCACCG");
// => "ambiguousDna"
// seq.checkType("AMUGACCCUGAAGGUGAAUGAA");
// => "ambiguousRna"
seq.checkType = function(sequence) {
var acgMatch = sequence.match(/[ACG]/i);
var tMatch = sequence.match(/[T]/i);
var nMatch = sequence.match(/[N]/i);
var uMatch = sequence.match(/[U]/i);
var potentialNucleotideMatch = sequence.match(/[WSMKRYBDHV]/i);
var proteinMatch = sequence.match(/[EFIJLOPQZX\*]/i);
if (proteinMatch) {
return "protein";
} else if (acgMatch && !potentialNucleotideMatch && !uMatch) {
return "dna";
} else if (acgMatch && potentialNucleotideMatch && !uMatch) {
return "ambiguousDna";
} else if (acgMatch && !potentialNucleotideMatch && uMatch && !tMatch) {
return "rna";
} else if (acgMatch && potentialNucleotideMatch && uMatch && !tMatch) {
return "ambiguousRna";
// seq.checkType("1234567891234567ATGACC");
// => undefined
//
// By default, the method has a 90% threshold, however, this can be altered as required.
//
// seq.checkType("1234567891234567ATGACC", 0.8);
// => undefined
// seq.checkType("--------MAYKSGKRPTFFEV", 0.7);
// => "protein"
//
// The length value specifies the length of the sequence to be analyse (default 10000). If your sequence is extremely long, you may want to analyse a shorter sub-section to reduce the computational burden.
//
// seq.checkType('A Very Long Sequence', 0.9, 1000);
// => Type based on the first 1000 characters
//
// The index value specifies the point on the sequence from which the sequence is to be analysed. Perhaps you know that there are lot of gaps at the start of the sequence.
//
// seq.checkType("--------MAYKSGKRPTFFEV", 0.9, 10000, 8);
// => "protein"
//
seq.checkType = function (sequence, threshold, length, index) {
if (threshold === undefined) {
threshold = 0.9
}
if (length === undefined) {
length = 10000
}
if (index === undefined) {
index = 1
}
var seq = sequence.slice(index - 1, length)
var total = seq.length
var acgMatch = ((seq.match(/[ACG]/gi) || []).length) / total
var tMatch = ((seq.match(/[T]/gi) || []).length) / total
var uMatch = ((seq.match(/[U]/gi) || []).length) / total
var proteinMatch = ((seq.match(/[ARNDCQEGHILKMFPSTWYV\*]/gi) || []).length) / total

if (((acgMatch + tMatch) >= threshold) || ((acgMatch + uMatch) >= threshold)) {
if (tMatch >= uMatch) {
return 'dna'
} else if (uMatch >= tMatch) {
return 'rna'
} else {
return 'dna'
}
} else if (proteinMatch >= threshold) {
return 'protein'
}
}

Expand Down
8 changes: 4 additions & 4 deletions test/bionode-seq.js
Expand Up @@ -4,10 +4,10 @@ var data = require('./data')

test("check sequence type", function(t) {
t.plan(5)
t.equal(seq.checkType(data.dnaSequence), 'dna', "should return strings 'dna' for sequence following IUPAC guidelines.")
t.equal(seq.checkType(data.rnaSequence), 'rna', "should return strings 'rna' for sequence following IUPAC guidelines.")
t.equal(seq.checkType(data.ambiguousDnaSequence), 'ambiguousDna', "should return strings 'ambiguousDna' for ambiguous DNA sequence.")
t.equal(seq.checkType(data.ambiguousRnaSequence), 'ambiguousRna', "should return strings 'ambiguousRna' for ambiguous RNA sequence.")
t.equal(seq.checkType(data.dnaSequence), 'dna', "should return strings 'dna' for sequence following DNA sequences.")
t.equal(seq.checkType(data.rnaSequence), 'rna', "should return strings 'rna' for sequence following RNA sequences.")
t.equal(seq.checkType(data.ambiguousDnaSequence), 'dna', "should return strings 'dna' for ambiguous DNA sequence (if within 0.9 threshold).")
t.equal(seq.checkType(data.junkSequence), undefined, "should return strings 'undefined' for JUNK sequence.")
t.equal(seq.checkType(data.exon1Protein), 'protein', "should return strings 'protein' for protein sequence")
})

Expand Down

0 comments on commit 9b81de0

Please sign in to comment.