In [1]:
from test_data import gapped_sequences, sequences, motif_1
from benchmark import Benchmark
from skbio.sequence import DNASequence,  genetic_code
import skbio.io
skbio_023_benchmark = Benchmark("scikit-bio 0.2.3")
from qiime_default_reference import get_reference_sequences

In [2]:
from skbio import DNA
skbio_seqs = [DNA(seq, id=str(id)) for id_, seq in sequences]
skbio_gapped_seqs = [DNA(seq, id=str(id)) for id_, seq in gapped_sequences]

In [3]:
@skbio_023_benchmark
def object_creation():
    for id_, seq in sequences:
        DNA(seq, id=id_)

1000 loops, best of 3: 476 µs per loop


In [4]:
@skbio_023_benchmark
def object_creation_validate():
    for id_, seq in sequences:
        DNA(seq, id=id_, validate=True)

10 loops, best of 3: 38.9 ms per loop


In [5]:
@skbio_023_benchmark
def read_fasta_file():
    list(skbio.io.read(get_reference_sequences(), format='fasta'))

1 loops, best of 3: 621 ms per loop


In [6]:
@skbio_023_benchmark
def reverse_complement():
    for s in skbio_seqs:
        s.reverse_complement()

10 loops, best of 3: 83.5 ms per loop


In [7]:
@skbio_023_benchmark
def search_for_motif():
    for s in skbio_seqs:
        s.regex_iter("("+motif_1+")")

1000 loops, best of 3: 244 µs per loop


In [8]:
@skbio_023_benchmark
def translate():
    gen_code = genetic_code(1)
    for s in skbio_seqs:
        gen_code.translate(s)
        

1 loops, best of 3: 1.19 s per loop


In [9]:
# @skbio_023_benchmark
# def reverse_translate():
#     gen_code = genetic_code(1)
#     for s in skbio_seqs:
#         gen_code.translate(''.join(reversed(s)))

In [10]:
@skbio_023_benchmark
def filter_fasta_to_no_gaps():
    [s for s in skbio_gapped_seqs if not s.is_gapped()]

1000 loops, best of 3: 506 µs per loop


In [11]:
@skbio_023_benchmark
def degap_all():
    for s in skbio_gapped_seqs:
        s.degap()

1 loops, best of 3: 396 ms per loop


In [12]:
@skbio_023_benchmark
def kmer_count_5():
    for s in skbio_seqs:
        s.k_word_counts(5, overlapping=True)

1 loops, best of 3: 3.54 s per loop


In [13]:
@skbio_023_benchmark
def kmer_count_25():
    for s in skbio_seqs:
        s.k_word_counts(25, overlapping=True)

1 loops, best of 3: 3.55 s per loop


In [14]:
@skbio_023_benchmark
def validate_chars():
    for s in skbio_seqs:
        s.is_valid()

10 loops, best of 3: 38.1 ms per loop


In [15]:
@skbio_023_benchmark
def filter_invalid_seqs():
    [s for s in skbio_seqs if s.is_valid()]

10 loops, best of 3: 37.8 ms per loop


In [16]:
@skbio_023_benchmark
def expand_degenerates():
    for s in skbio_seqs:
        list(s.nondegenerates())

1 loops, best of 3: 276 ms per loop


In [17]:
@skbio_023_benchmark
def gc_content():
    for s in skbio_seqs:
        gc = 0
        for i in s:
            if i in ['G', 'C']:
                gc += 1
        gc * 1.0 / len(s)

10 loops, best of 3: 43 ms per loop


In [18]:
@skbio_023_benchmark
def find_motif_in_gapped():
    m_len = len(motif_1)
    for s in skbio_gapped_seqs:
        s_ungapped = str(s.degap())
        m = s.gap_maps()
        found = s_ungapped.find(motif_1)
        if found > 0:
            start = m[0][found]
            end   = start + m_len
            s[start:end]

1 loops, best of 3: 3.11 s per loop


In [19]:
@skbio_023_benchmark
def slice_at_midpoint():
    for e in skbio_seqs:
        e[:len(e)]

100 loops, best of 3: 1.83 ms per loop


In [20]:
skbio_023_benchmark.record("skbio023.csv")