In [None]:
from util import *
from search import search
from agnostic_search import search as agnostic_search
from prototype import *
from random import shuffle

In [None]:
seqs = load_fasta_records_as_str("uniprot_sprot.fasta")
shuffle(seqs)
seqs = seqs[:10000]
peps = collapse_second_order_list(map(digest_trypsin,seqs))
peps = list(filter(lambda pep: 'X' not in pep, peps))
specs = list(map(generate_spectrum_and_list_mz,peps))
specs = [np.unique(spec) for spec in specs]
tolerance = 2 * AVERAGE_MASS_DIFFERENCE
alphabet = AMINO_MASS_MONO

## Timing and Sanity Checks

In [None]:
from time import time
def duration(fn, specs, *args):
    init_t = time()
    for spec in specs:
        fn(spec,*args)
    return time() - init_t

In [None]:
duration(old_search_overlap,specs,tolerance,alphabet)

In [None]:
duration(old_search_overlap_alt,specs,tolerance,alphabet)

In [None]:
duration(search,specs,"overlap",alphabet,tolerance)

In [None]:
duration(search,specs,"overlap_alt",alphabet,tolerance)

## Validation

In [None]:
from random import randint

In [None]:
i = randint(0,len(specs)-1)
spec = specs[i]
p = locate_pivot_point(spec,tolerance)
print(p)
print(measure_mirror_symmetry(spec,p), (len(spec) - 1)/len(spec))
pep = peps[i]
b = get_b_ion_series(pep)
y = get_y_ion_series(pep)
true_pivot = np.mean([*b[0:2],*y[-3:-1]])
print(abs(true_pivot - p))

In [None]:
import os

sprot = load_fasta_records("uniprot_sprot.fasta")
shuffle(sprot)
sprot_10k = sprot[:int(1e5)]

counter = 0
filename = "sprot_10k_{}.fasta"
while os.path.isfile(filename.format(counter)):
    counter += 1
filename = filename.format(counter)
print(filename)

with open(filename,"w") as handle:
    SeqIO.write(sprot_10k, handle, "fasta")

## Miscellaneous

In [None]:
from agnostic_search import _find_gapped_pairs

In [None]:
get_gap = lambda x: x[1]-x[0]
candidate_pairs = sorted(_find_gapped_pairs(specs[0],min(AMINO_MASS_MONO),max(AMINO_MASS_MONO), 2 * AVERAGE_MASS_DIFFERENCE), key = get_gap)
list(map(get_gap,candidate_pairs))