In [2]:
from test_data import gapped_sequences, sequences, motif1
from cogent.core.sequence import NucleicAcidSequence, DnaSequence
from cogent.core.alphabet import AlphabetError
from cogent.core.genetic_code import DEFAULT as standard_code
from benchmark import Benchmark
cogent_benchmark = Benchmark("pycogent")

cogent_sequences = [DnaSequence(seq) for _, seq in sequences]
cogent_gapped_sequences = [DnaSequence(seq) for _, seq in gapped_sequences]

In [40]:
@cogent_benchmark
def object_creation():
    for id_, seq in sequences:
        DnaSequence(seq)

100 loops, best of 3: 3.17 ms per loop


In [41]:
@cogent_benchmark
def object_creation_validate():
    for id_, seq in sequences:
        DNA.makeSequence(seq)

100 loops, best of 3: 3.24 ms per loop


In [42]:
@cogent_benchmark
def reverse_complement():
    for e in cogent_sequences:
        DnaSequence(e).rc()

100 loops, best of 3: 13.7 ms per loop


In [43]:
@cogent_benchmark
def degap_all():
    for e in cogent_gapped_sequences:
        DnaSequence(e).degap()

100 loops, best of 3: 15.9 ms per loop


In [40]:
@cogent_benchmark
def translate():
    for e in cogent_sequences:
        try:
            e.getTranslation()
        except AlphabetError:
            pass

100 loops, best of 3: 10.2 ms per loop


In [None]:
@cogent_benchmark
def search_for_motif():
    for e in cogent_sequences:
        str(e).find(motif)

In [20]:
from collections import defaultdict

@cogent_benchmark
def kmer_count_5():
    for e in cogent_sequences:
        d = defaultdict(int)
        for i in range(len(e)-5+1):
            d[e[i:i+5]] += 1

1 loops, best of 3: 19.2 s per loop


In [21]:
@cogent_benchmark
def kmer_count_25():
    for e in cogent_sequences:
        d = defaultdict(int)
        for i in range(len(e)-25+1):
            d[e[i:i+25]] += 1

1 loops, best of 3: 19 s per loop


In [22]:
x = [DnaSequence(seq, check=False) for _, seq in sequences]

@cogent_benchmark
def validate_chars():
    for e in x:
        e.isValid()

10 loops, best of 3: 39.3 ms per loop


In [9]:
@cogent_benchmark
def filter_invalid_seqs():
    l = []
    for id_, seq in sequences:
        try:
            l.append(DnaSequence(seq, check=True))
        except AlphabetError:
            pass

10 loops, best of 3: 23.4 ms per loop


In [None]:
@cogent_benchmark
def rc_find_motif():
    pass

In [24]:
from cogent.seqsim.sequence_generators import SequenceGenerator, IUPAC_DNA

def expand_degeneracies(raw_primer):
    """Cut from PrimerProspector: 
    http://sourceforge.net/p/pprospector/code/HEAD/tree/tags/1.0.1-release/primerprospector/check_primer_barcode_dimers.py
    """
    
    primers = SequenceGenerator(template=raw_primer, alphabet=IUPAC_DNA)
    expanded_primers = []
    for primer in primers:
        expanded_primers.append(primer)
        
    return expanded_primers

@cogent_benchmark
def expand_degenerates():
    for e in cogent_sequences:
        expand_degeneracies(e)
        

1 loops, best of 3: 562 ms per loop


In [13]:
@cogent_benchmark
def gc_content():
    for e in cogent_sequences:
        (e.count('G') + e.count('C')) / len(e)

100 loops, best of 3: 4.32 ms per loop


In [25]:
@cogent_benchmark
def find_motif_in_gapped():
    for e in cogent_sequences:
        gapped, ungapped = e.gapMaps()
        degapped_e = e.degap()
        start = str(degapped_e).find(motif)
        e[ungapped[start]:ungapped[start+len(motif)]]
        

NameError: global name 'motif' is not defined

In [None]:
@cogent_benchmark
def read_fasta_file():
    pass

In [None]:
cogent_sequences[0][1].translate

['ACGTTT',
 'ACGTTC',
 'ACGTTA',
 'ACGTTG',
 'ACGTCT',
 'ACGTCC',
 'ACGTCA',
 'ACGTCG']

In [45]:
cogent_benchmark.record("cogent.csv")