similarity_to(spec2, round_precision=0)[source]

    Compares two spectra and returns cosine

Parameters

    spec2 (Spectrum) – another pymzml spectrum that is compared to the current spectrum.

Keyword Arguments
    
    round_precision (int) – precision mzs are rounded to, i.e. round( mz, round_precision )

Returns
    
    value between 0 and 1, i.e. the cosine between the two spectra.

Return type
    
    cosine (float)

highest_peaks(n)[source]

Function to retrieve the n-highest centroided peaks of the spectrum.

In [1]:
import pymzml
import matplotlib.pyplot as plt
import numpy as np

import pyteomics
from pyteomics import mzml, auxiliary

import plotly.graph_objects as go

import tqdm

In [63]:
path = 'D:/UW/massmotif/mzml/DRO_DIE_1ppm_29h_exp1_method_1.mzML'

In [83]:
f=mzml.MzML(path)

In [3]:
def get_scans(path, ms_all = False, ms_lv = 1):
    run = pymzml.run.Reader(path)
    scans = []
    if ms_all == False:
        for scan in run:
            if scan.ms_level == ms_lv:
                scans.append(scan)
    elif ms_all == True:
        for scan in run:
            scans.append(scan)
            
    return scans

In [64]:
ms2s = get_scans(path, False, 2)

- use first5 rather than precursor alone -> 100 baseline for first5 -> loop until precursor - 100 
- group result with precursor+retention time(0.5 min)

In [178]:
def motif_seek(ms2_scans, motifs, error = 0.002, noise_level = 10, precursor_base = 500, top_frags = 5, precursor_dist = 50):
    motif_result = []
    motif_range = []
    for motif in motifs:
        motif_range.append([motif - error, motif + error])
    
    for scan in ms2_scans:
        precursor = scan.selected_precursors[0]['mz']
        drop_index = np.argwhere(scan.i <= noise_level)
        scan.i = np.delete(scan.i, drop_index)
        scan.mz = np.delete(scan.mz, drop_index)
        
        frag = scan.mz[scan.mz < precursor]
        frag_i = scan.i[: len(frag)] # In case need it
        base_index = np.argwhere(frag_i >= precursor_base)
        top_list = sorted(frag[base_index])[-top_frags : ]
        top_range = precursor - precursor_dist
        top_list = [top for top in top_list if top >= top_range]
        
        
        neutral_loss = precursor - frag
        for top_frag in top_list:
            neutral_loss_top = top_frag - frag
            neutral_loss_top = neutral_loss_top[neutral_loss_top > 0]
            neutral_loss = np.append(neutral_loss, neutral_loss_top)
        
        mtf_count = 0
        for mtf in motif_range:
            mtf_hit = neutral_loss[(mtf[0] < neutral_loss) & (neutral_loss < mtf[1])]
            if len(mtf_hit) > 0:
                mtf_count += 1
        
        if mtf_count == len(motifs):
            motif_result.append([scan.selected_precursors[0]['mz'], round(scan.scan_time[0],2), scan.ID])
        
    #motif_seek = list(set(motif_result)) # if merge is needed
        
    return sorted(motif_result)

In [179]:
motif = [41.0265, 18.0105]
#motif = [41.0265]
result = motif_seek(ms2s, motif, error = 0.01, noise_level = 10, precursor_base = 500)

In [180]:
len(result)

658

In [46]:
def motif_simp(motif_result, rtrange=[0, 20], mzrange=[0, 500]):
    mz = []
    for i in motif_result:
        mz.append(i[0])
    sortmz = sorted(list(set(mz)))
    
    mergemz = []
    for i in sortmz:
        for r in result:
            if r[0] != i:
                pass
            elif r[0] == i:
                mergemz.append(r)
                break
    
    filtered_list = []
    for n in mergemz:
        if n[1] > rtrange[0] and n[1] < rtrange[1]:
            if n[0] > mzrange[0] and n[0] < mzrange[1]:
                filtered_list.append(n)
    return filtered_list

In [77]:
result

[[100.932026817685, 17.3, 1037835],
 [100.932026817685, 19.03, 1142045],
 [100.932026817685, 19.04, 1142232],
 [100.932026817685, 19.3, 1158052],
 [100.932026817685, 20.24, 1214350],
 [100.932026817685, 20.24, 1214433],
 [100.932026817685, 20.35, 1220864],
 [100.932026817685, 20.49, 1229233],
 [100.932026817685, 20.58, 1234605],
 [100.932026817685, 20.79, 1247318],
 [102.973678588867, 21.1, 1265797],
 [111.000689094449, 10.39, 623172],
 [111.000689094449, 10.51, 630815],
 [111.000689094449, 10.98, 658808],
 [111.000689094449, 11.4, 684286],
 [111.000689094449, 12.33, 740015],
 [111.000689094449, 12.44, 746593],
 [111.000689094449, 12.56, 753813],
 [112.982522735596, 0.51, 30499],
 [112.982522735596, 4.17, 250481],
 [112.982522735596, 4.26, 255629],
 [112.982522735596, 21.46, 1287659],
 [130.009956832481, 4.24, 254392],
 [130.009956832481, 9.44, 566168],
 [130.009956832481, 10.07, 604134],
 [130.009956832481, 11.8, 707779],
 [130.009956832481, 12.12, 726951],
 [130.009956832481, 12.31, 

In [72]:
def find_scan(ms2s, scanid, interactive = True):
    
    for scan in ms2s:
        if scan.ID == scanid:
            break
    
    print('Precursor m/z: {:0.2f}, Scan time: {:0.1f} minute'.format(scan.selected_precursors[0]['mz'], scan.scan_time[0]))
    
    mz = scan.mz
    ints = scan.i
    
    if interactive == True:
        plt.clf()
        fig = go.Figure([go.Bar(x=mz, y=ints, marker_color = 'red', width = 0.5,
                        hovertemplate =
                        'Int: %{y}'+
                        '<br>m/z: %{x}<br>')])
        fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(0,0,0)',
                  marker_line_width=0.5, opacity=1)
        fig.update_layout(
                template = 'simple_white',
                width = 1000,
                height = 600,
                xaxis = dict(title = 'm/z ratio',
                        rangeslider=dict(
            visible = True
        )),
                yaxis = dict(
                    title = 'Intensity'))
        fig.show()
    
    elif interactive == False:
        plt.figure(figsize=(10,5))
        plt.bar(mz, ints, width = 1.0)
        plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
        plt.xlabel('m/z')
        plt.ylabel('Intensity')
        plt.title('MS1 spectrum')
        plt.xlim(0,340)

    return 

In [95]:
test1 = find_scan(ms2s, 354961, True)

Precursor m/z: 326.17, Scan time: 5.9 minute


<Figure size 432x288 with 0 Axes>

In [124]:
def cos_one(input_scan, ms2_scans, score_thres = 0.9):
    count = 0
    for scan in ms2_scans:
        score = input_scan.similarity_to(scan, round_precision=0)
        if score >= score_thres:
            count +=1
            print(count)
    print('one scan finished')
                        
    return count

2

In [94]:
f[60]

{'index': 60,
 'id': 'scanId=14573',
 'defaultArrayLength': 461,
 'scanList': {'count': 1,
  'scan': [{'scanWindowList': {'count': 1,
     'scanWindow': [{'scan window lower limit': 100.96910038153 m/z,
       'scan window upper limit': 665.34001163575 m/z}]},
    'scan start time': 0.24275 minute}],
  'no combination': ''},
 'negative scan': '',
 'base peak m/z': 112.982404490911 m/z,
 'base peak intensity': 170394.688 number of detector counts,
 'total ion current': 776947.5 number of detector counts,
 'ms level': 1,
 'MS1 spectrum': '',
 'centroid spectrum': '',
 'lowest observed m/z': 100.96910038153 m/z,
 'highest observed m/z': 665.34001163575 m/z,
 'spectrum title': 'DRO DIE 1ppm 29h exp1_method 1.14573.14573. File:"DRO DIE 1ppm 29h exp1_method 1.d", NativeID:"scanId=14573"',
 'count': 2,
 'm/z array': array([100.9691  , 100.99335 , 101.02367 , 101.036156, 101.53244 ,
        102.033936, 102.05219 , 102.91956 , 102.97579 , 103.00047 ,
        103.04059 , 103.069336, 104.28123 , 