# Example Notebook for making performance and accuracy benchmarking figure

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import sys
sys.path.insert(0, '../')

import blink
from importlib import reload
reload(blink)

import pandas as pd
import numpy as np
import scipy
from scipy import sparse as sp

from matplotlib import pyplot as plt
import seaborn as sns

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# Load data

In [2]:
small  = blink.open_msms_file('./small.mgf')
medium = blink.open_msms_file('./medium.mgf')

S1 = blink.discretize_spectra(small.spectrum,  small.precursor_mz,  intensity_power=0.5)
S2 = blink.discretize_spectra(medium.spectrum, medium.precursor_mz, intensity_power=0.5)

# Run BLINK on 1 million comparisons

In [3]:
%%time
S12 = blink.score_sparse_spectra(S1, S2, tolerance=0.01)

CPU times: user 89.8 ms, sys: 13.1 ms, total: 103 ms
Wall time: 103 ms


# Run MatchMS for Same comparisons

In [4]:
import matchms as mms
from matchms.similarity import CosineGreedy, ModifiedCosine

MMS1 = [mms.Spectrum(s.spectrum[0],
                     s.spectrum[1],
                     metadata={'precursor_mz': s.precursor_mz}) 
        for i,s in small.iterrows()]
MMS2 = [mms.Spectrum(m.spectrum[0],
                     m.spectrum[1],
                     metadata={'precursor_mz': m.precursor_mz}) 
        for i,m in medium.iterrows()]

cos = CosineGreedy(tolerance=0.0099,intensity_power=0.5)
mod = ModifiedCosine(tolerance=0.0099,intensity_power=0.5)

In [None]:
%%time
MMS12 = {}
MMS12['cos'] = cos.matrix(references=MMS1, queries=MMS2)
MMS12['mod'] = mod.matrix(references=MMS1, queries=MMS2)

# Load Precomputed Compute timing for various numbers of compariosns for MatchMS and BLINK

In [None]:
benchmark_df = pd.read_csv('./blink_benchmark.csv').groupby(['comparisons', 'algorithm']).median()

# Generate Figure in Manuscript

In [None]:
%matplotlib inline
x = MMS12['cos']['score'].flatten()
y = S12['mzi'].toarray().flatten()
x,y = x[(x>0)&(y>0)], y[(x>0)&(y>0)]


fig, ax = plt.subplots(figsize=(17, 6),nrows=1,ncols=3)
ax = ax.flatten()
sns.scatterplot(x=x, y=y, s=70, label='Equal: {}'.format(len(x)), alpha=.5,ax=ax[0])
x,y = x[(y+1e-3<x)|(x<y-1e-3)], y[(x+1e-3<y)|(y<x-1e-3)]
sns.scatterplot(x=x, y=y, s=70, label='Unequal: {}'.format(len(x)),ax=ax[0])
ax[0].set_xlabel('MatchMS Score',fontsize=20)
ax[0].set_ylabel('BLINK Score',fontsize=20)


x = MMS12['cos']['matches'].flatten()
y = S12['mzc'].toarray().flatten()
x,y = x[(x>0)&(y>0)], y[(x>0)&(y>0)]

sns.scatterplot(x=x, y=y, s=70, label='Equal: {}'.format(len(x)), alpha=.5,ax=ax[1])
x,y = x[(y+.5<x)|(x<y-.5)], y[(x+.5<y)|(y<x-.5)]
sns.scatterplot(x=x, y=y, s=70, label='Unequal: {}'.format(len(x)),ax=ax[1])
ax[1].set_xlabel('MatchMS # Matching Ions',fontsize=20)
ax[1].set_ylabel('BLINK # Matching Ions',fontsize=20)

 
sns.lineplot(data=benchmark_df, x='comparisons', y='seconds', 
             hue='algorithm', style='algorithm', 
             markers=True, dashes=False,ax=ax[2],markersize=15)

ax[2].set_xscale('log')
ax[2].set_yscale('log')
ax[2].set_ylabel('Compute time (sec)',fontsize=20)
ax[2].set_xlabel('# Comparisons',fontsize=20)

for i,a in enumerate(ax):
    a.grid()
    if i==2:
        a.legend(fontsize=18,labels=['BLINK','MatchMS'])
    else:
        a.legend(fontsize=18,loc='upper left')
    
    for label in (a.get_xticklabels() + a.get_yticklabels()):
        label.set_fontsize(18)
    a.text(0.925,0.05,chr(97+i),fontsize=30,transform=a.transAxes)

plt.tight_layout()
plt.show()

fig.savefig('accuracy_benchmarking.pdf')

In [None]:
benchmark_df.reset_index(inplace=True)
timing_df = pd.merge(benchmark_df[benchmark_df['algorithm']=='blink'].add_suffix('_blink'),
         benchmark_df[benchmark_df['algorithm']=='matchms'].add_suffix('_matchms'),
         left_on='comparisons_blink',
         right_on='comparisons_matchms',
         how='inner')
timing_df.head()

In [None]:
timing_df['seconds_matchms']/timing_df['seconds_blink']