In [4]:
from test_data import gapped_sequences, sequences, motif_1
from benchmark import Benchmark
from skbio.sequence import genetic_code, DNA
from qiime_default_reference import get_reference_sequences

from skbio.io import read

skbio_benchmark = Benchmark("scikit-bio master")
motif_1_regex = '(' + motif_1 + ')'


In [5]:
from skbio import DNA
skbio_seqs = [DNA(seq, id=str(id)) for id_, seq in sequences]
skbio_gapped_seqs = [DNA(seq, id=str(id)) for id_, seq in gapped_sequences]

In [6]:
@skbio_benchmark
def object_creation():
    for id_, seq in sequences:
        DNA(seq, id=id_, validate=False)

100 loops, best of 3: 4.48 ms per loop


In [7]:
@skbio_benchmark
def object_creation_validate():
    for id_, seq in sequences:
        DNA(seq, id=id_)

100 loops, best of 3: 14.2 ms per loop


In [8]:
@skbio_benchmark
def reverse_complement():
    for s in skbio_seqs:
        s.reverse_complement()

1 loops, best of 3: 13.3 s per loop


In [9]:
@skbio_benchmark
def degap_all():
    for s in skbio_gapped_seqs:
        s.degap()

10 loops, best of 3: 43.6 ms per loop


In [10]:
#John
sgc = genetic_code(1)
@skbio_benchmark
def translate():
    for seq in skbio_seqs:
        sgc.translate(seq, 1)
 

1 loops, best of 3: 6.71 s per loop


In [11]:
@skbio_benchmark
def search_for_motif():
    for seq in skbio_seqs:
        list(seq.slices_from_regex(motif_1_regex))

10 loops, best of 3: 24.4 ms per loop


In [12]:
@skbio_benchmark
def kmer_count_5():
    for seq in skbio_seqs:
        seq.kmer_frequencies(5)

1 loops, best of 3: 18.4 s per loop


In [13]:
@skbio_benchmark
def kmer_count_25():
    for seq in skbio_seqs:
        seq.kmer_frequencies(25)

1 loops, best of 3: 19.4 s per loop


In [14]:
@skbio_benchmark
def validate_chars():
    for s in skbio_seqs:
        DNA(s)

100 loops, best of 3: 19.2 ms per loop


In [15]:
l = []
@skbio_benchmark
def filter_invalid_seqs():
    for s in skbio_seqs:
        try:
            l.append(DNA(s))
        except ValueError:
            pass

100 loops, best of 3: 20.4 ms per loop


In [16]:
@skbio_benchmark
def expand_degenerates():
    for s in skbio_seqs:
        list(s.expand_degenerates())

1 loops, best of 3: 13.1 s per loop


In [17]:
@skbio_benchmark
def gc_content():
    for s in skbio_seqs:
        float(s.count("G") + s.count("C"))/len(s)

10 loops, best of 3: 18.3 ms per loop


In [18]:
@skbio_benchmark
def find_motif_in_gapped():
    for seq in skbio_gapped_seqs:
        list(seq.slices_from_regex(motif_1_regex, ignore=seq.gaps()))

10 loops, best of 3: 90.1 ms per loop


In [19]:
@skbio_benchmark
def read_fasta_file():
    list(read(get_reference_sequences(), format='fasta'))

1 loops, best of 3: 1.87 s per loop


In [20]:
@skbio_benchmark
def slice_at_midpoint():
    for e in skbio_seqs:
        e[:len(e)]

100 loops, best of 3: 11.1 ms per loop


In [21]:
# @skbio_benchmark
# def reverse_translate():
#     pass

In [22]:
# @skbio_benchmark
# def filter_fasta_to_no_gaps():
#     pass

In [23]:

# @skbio_benchmark
# def rc_find_motif():
#     pass

In [24]:
skbio_benchmark.record("skbio.csv")