In [None]:
import joblib
import numba as nb
import pandas as pd
import spectrum_utils.utils as suu

In [None]:
@nb.njit(parallel=True)
def count_candidates(precursor_mzs, ppm_tol):
    count = 0
    for i in nb.prange(len(precursor_mzs)):
        j = i + 1
        while (j < len(precursor_mzs) and
               suu.mass_diff(precursor_mzs[j], precursor_mzs[i], False) < ppm_tol):
            count, j = count + 1, j + 1
    return count

In [None]:
spectra = pd.read_parquet('kim2014_spectra.parquet')

In [None]:
num_brute_force, ppm_tol = 0, 20
for charge, precursor_mzs in spectra.groupby('precursor_charge')['precursor_mz']:
    precursor_mzs = precursor_mzs.sort_values().values
    num_brute_force += count_candidates(precursor_mzs, ppm_tol)
# Naive approach where each spectrum is compared to all its neighbors.
num_brute_force *= 2

In [None]:
num_falcon = (25664862 + 727350893 + 329854885 + 74245350 + 18032994 +
              3565450 + 416811 + 50290 + 11026 + 3862 + 2463 + 1863 + 113 +
              4 + 175)

In [None]:
print(f'Number of spectra: {len(spectra):,d}')
print(f'Number of brute-force spectrum pairs ({ppm_tol} ppm): {num_brute_force:,d}')
print(f'Average number of brute-force comparisons per spectrum: '
      f'{round(num_brute_force / len(spectra))}')
print(f'Number of falcon spectrum pairs ({ppm_tol} ppm): {num_falcon:,d}')
print(f'Average number of falcon comparisons per spectrum: '
      f'{round(num_falcon / len(spectra))}')