In [87]:
from test_data import gapped_sequences, sequences, motif_1
from benchmark import Benchmark
from skbio.sequence import genetic_code, DNA
from qiime_default_reference import get_reference_sequences

from skbio.io import read

skbio_benchmark = Benchmark("scikit-bio master")
motif_1_regex = '(' + motif_1 + ')'


In [88]:
from skbio import DNA
skbio_seqs = [DNA(seq, id=str(id)) for id_, seq in sequences]
skbio_gapped_seqs = [DNA(seq, id=str(id)) for id_, seq in gapped_sequences]

In [89]:
@skbio_benchmark
def object_creation():
    for id_, seq in sequences:
        DNA(seq, id=id_, validate=False)

100 loops, best of 3: 3.08 ms per loop


In [90]:
@skbio_benchmark
def object_creation_validate():
    for id_, seq in sequences:
        DNA(seq, id=id_)

100 loops, best of 3: 11.3 ms per loop


In [91]:
@skbio_benchmark
def reverse_complement():
    for s in skbio_seqs:
        s.reverse_complement()

1 loops, best of 3: 7.84 s per loop


In [92]:
@skbio_benchmark
def degap_all():
    for s in skbio_gapped_seqs:
        s.degap()

10 loops, best of 3: 39.4 ms per loop


In [93]:
#John
sgc = genetic_code(1)
@skbio_benchmark
def translate():
    for seq in skbio_seqs:
        sgc.translate(seq, 1)
 

1 loops, best of 3: 3.94 s per loop


In [94]:
@skbio_benchmark
def search_for_motif():
    for seq in skbio_seqs:
        list(seq.slices_from_regex(motif_1_regex))

100 loops, best of 3: 16.4 ms per loop


In [None]:
@skbio_benchmark
def kmer_count_5():
    for seq in skbio_seqs:
        seq.kmer_frequencies(5)

1 loops, best of 3: 11.6 s per loop


In [None]:
@skbio_benchmark
def kmer_count_25():
    for seq in skbio_seqs:
        seq.kmer_frequencies(25)

In [None]:
@skbio_benchmark
def validate_chars():
    for s in skbio_seqs:
        DNA(s)

In [None]:
l = []
@skbio_benchmark
def filter_invalid_seqs():
    for s in skbio_seqs:
        try:
            l.append(DNA(s))
        except ValueError:
            pass

In [None]:
@skbio_benchmark
def expand_degenerates():
    for s in skbio_seqs:
        list(s.expand_degenerates())

In [None]:
@skbio_benchmark
def gc_content():
    for s in skbio_seqs:
        float(s.count("G") + s.count("C"))/len(s)

In [None]:
@skbio_benchmark
def find_motif_in_gapped():
    for seq in skbio_gapped_seqs:
        list(seq.slices_from_regex(motif_1_regex, ignore=seq.gaps()))

In [None]:
@skbio_benchmark
def read_fasta_file():
    list(read(get_reference_sequences(), format='fasta'))

In [None]:
# @skbio_benchmark
# def reverse_translate():
#     pass

In [None]:
# @skbio_benchmark
# def filter_fasta_to_no_gaps():
#     pass

In [None]:

# @skbio_benchmark
# def rc_find_motif():
#     pass

In [None]:
skbio_benchmark.record("skbio.csv")